ABBYY
Flexipature engingine is a Software Development Kit (SDK) for
extracting data from any types of documents, Such as PDF, Image,
Scanned document etc. It is one of the best solution for developers
to extract data from Latter, Invoice, Forms, Identity card etc.
Besides of these we also can perform the image processing ( Best
Quality) task.
Note
[ ABBYY FlexiCapture Engine is not an open source application, You
should have the license for using this application. You can get trial
license with sending the request using your business email Id. ]
FlexiLayout
Studio
ABBYY
FlexiLayout Studio is an application software through which you get
text from Structured, Semi-structured and unstructured document.
Application contains the different tools for extracting the data from
your document. Such as Block, Element, Group, Checkmark etc. Using
this application we can mark the area form which we have to get the
data. You can export AFL File (.afl extension) once you mark all the
required text and values.
Getting
text form any Complex document using FlexiLayout Studio.
1.
Install FlexiLayout Studio which you get form the ABBYY SDK
technology.
- Open application and ctreate a project as below.
- Add your File (Image, PDF, Scan etc) using the Add Image Button as following figure.
4
Double Click on your file to display the file in document area.
5.
Create a document as a Training set. (See Image below)
6. Now,
Let us suppose, we have to get Policy Number, Claim No, Insured Name
etc form the document ( See above image). A FlexiLayout section
contains Block and element as a mark which indicates the Marked area
on your document. So, Create an block and element as required.
7. For
creating an element use element tool and select the area which you
want to capture by drugging the tool (In picture Below). For data
separation you can create a group and add element in created group
as following image.
9. Create a relation for an element for accessing the element value by
its name in block section
You
have to crate an element relation with all its value element. And
the value element will be referenced by the block. Which will be
accessible by different programming language. Such as C#, VB, C++
etc.
You
have to do the above task task for referencing the all block from an
element. Following are the final element and block set through which
we will access the field value.
- Now Export the project and save AFL (Abby FlexiLayout) file with any name which you want.
Above
was the AFL project creation task to access the data from any type of
the document. Now, we will use this AFL file in My C# programming to
access the data.
Use of FlexiLayout File
in Windows or Web application to access the data.
- Create a Windows or Web application in which you want to access the data. Here we are creating a windows application.
- Open Visual Studio
- Goto file menu and choose new project.
- Select Windows form application, Type the Name of project and click OK button.
- Design the form as following. We are using two text box and its button to locate the the Flexilayout file (.afl extension) and the document which data we have to extract.
- Goto Code behind and Add FCEngine.dll in your reference folder. ( From ABBYY SDK which you get from ABBYY technology.)
- Write the following code in your from code behind and Run the Application.
using
FCEngine;
using
System;
using
System.Collections.Generic;
using
System.ComponentModel;
using
System.Data;
using
System.Drawing;
using
System.IO;
using
System.Runtime.InteropServices;
using
System.Text;
using
System.Windows.Forms;
namespace
AflToDocDiff
{
public partial class Form1 : Form
{
IEngine engine = null;
IFlexiCaptureProcessor processor;
IDocumentDefinition definition;
IDocumentDefinition definition1;
StringBuilder sb;
private Dictionary<string, Dictionary<string,
FieldParams>> fieldToType =
new Dictionary<string, Dictionary<string,
FieldParams>>();
////private string[] imageFiles;
////private int currentImageFileIndex = 0;
////private Image processedImage;
public Form1()
{
InitializeComponent();
}
private IEngine loadEngine()
{
IEngine engine;
int hresult =
InitializeEngine(FceConfig.GetDeveloperSN(), out engine);
Marshal.ThrowExceptionForHR(hresult);
return engine;
}
private void unloadEngine(ref IEngine engine)
{
engine = null;
int hResult = DeinitializeEngine();
Marshal.ThrowExceptionForHR(hResult);
}
[DllImport(FceConfig.DllPath, CharSet =
CharSet.Unicode), PreserveSig]
internal static extern int InitializeEngine(String
devSN, out IEngine engine);
[DllImport(FceConfig.DllPath, CharSet =
CharSet.Unicode), PreserveSig]
internal static extern int DeinitializeEngine();
class Gdi32
{
[DllImport("gdi32.dll", EntryPoint =
"DeleteObject")]
internal static extern IntPtr DeleteObject(IntPtr
hDc);
}
private void CreateDocumentDef()
{
}
// IDocumentDefinition[] newDocumentDefinitions;
//string[] templateNames;
private void button1_Click(object sender, EventArgs e)
{
try
{
engine = loadEngine();
//definition1 =
engine.CreateDocumentDefinitionFromAFL(txtAflFilePath.Text,
"English");
//tuneRecognitionParams("1",
definition1);
//((ICustomStorage)definition1).SaveToFile(@"D:\\AflToDocDiff\\AflToDocDiff\\FcdotDoc\\Test1.fcdot");
string extension =
Path.GetExtension(txtAflFilePath.Text);
if (processor == null)
{
ICustomStorage storage = null;
switch (extension)
{
case ".xfd":
definition =
engine.CreateDocumentDefinitionFromXFD(txtAflFilePath.Text,
"English");
storage =
(ICustomStorage)definition;
break;
case ".afl":
definition =
engine.CreateDocumentDefinitionFromAFL(txtAflFilePath.Text,
"English");
storage =
(ICustomStorage)definition;
break;
case ".fcdot":
definition =
engine.CreateDocumentDefinition();
storage =
(ICustomStorage)definition;
storage.LoadFromFile(txtAflFilePath.Text);
break;
default:
throw new NotImplementedException();
}
processor =
engine.CreateFlexiCaptureProcessor();
processor.AddDocumentDefinition(definition);
}
else
{
processor.ResetProcessing();
processor = null;
}
var imageTools =
engine.CreateImageProcessingTools();
var file =
imageTools.OpenImageFile(txtFormPath.Text);
int pageCount = file.PagesCount;
for (int i = 0; i < pageCount; i++)
{
processor.AddImage(file.OpenImagePage(i));
}
IDocument document =
processor.RecognizeNextDocument();
if (document != null &&
document.DocumentDefinition != null)
{
sb = new StringBuilder();
buildDocumentView(document);
//
File.WriteAllText(@"C:\Users\GT-PC-15\Desktop\AFL\Result\RF"
+ DateTime.Now.ToString("yyyyMMddTHHmmss") + ".txt",
sb.ToString());
}
string desc = definition.Description;
}
catch (Exception ex)
{
string msg = ex.Message;
}
finally
{
unloadEngine(ref engine);
processor = null;
}
}
private void tuneRecognitionParams(string name,
IDocumentDefinition definition)
{
if (fieldToType.ContainsKey(name))
{
bool modified = false;
var fields = definition.Sections[0].Fields;
for (int i = 0; i < fields.Count; i++)
{
if
(fieldToType[name].ContainsKey(fields[i].Name))
{
var fieldParams =
fieldToType[name][fields[i].Name];
var textParams =
fields[i].RecognitionParams.AsTextParams();
if (fieldParams.Type ==
FieldValueTypeEnum.FVT_Text)
{
if (fieldParams.RegExp != null)
{
int pos =
fieldParams.Params.IndexOf(' ');
string letters =
fieldParams.Params.Substring(0, pos);
string regExp =
fieldParams.Params.Substring(pos + 1);
var newLanguage =
textParams.CreateEmbeddedLanguage(LanguageTypeEnum.LT_Simple, null);
newLanguage.AsSimpleLanguage().set_LetterSet(LanguageLetterSetEnum.LLS_Alphabet,
letters);
newLanguage.AsSimpleLanguage().RegularExpression =
regExp;
textParams.Language =
newLanguage;
}
}
else
{
var newLanguage =
textParams.CreateEmbeddedLanguageByDataType(fieldParams.Type);
textParams.Language = newLanguage;
}
textParams.TextType =
fieldParams.TextType;
textParams.CaseRecognitionMode =
fieldParams.CaseType;
modified = true;
}
}
if (modified)
{
definition.Check();
}
}
}
private void button2_Click(object sender, EventArgs e)
{
DialogResult result = openFileDialog1.ShowDialog();
// Show the dialog.
if (result == DialogResult.OK) // Test result.
{
txtAflFilePath.Text = openFileDialog1.FileName;
}
}
private void button3_Click(object sender, EventArgs e)
{
DialogResult result = openFileDialog2.ShowDialog();
// Show the dialog.
if (result == DialogResult.OK) // Test result.
{
txtFormPath.Text = openFileDialog2.FileName;
}
}
private void buildDocumentView(IDocument document)
{
IField firstSection = document.Sections[0];
addDocumentNodeChildren(firstSection.Children);
}
private void addDocumentNodeChildren(IFields children)
{
for (int i = 0; i < children.Count; i++)
{
addDocumentNode(children[i]);
}
}
private void addDocumentNode(IField documentNode)
{
IFieldValue value = documentNode.Value;
if (value == null)
{
// sb.AppendLine(string.Format("{0}:{1}",
documentNode.Name, value.AsString));
}
else
{
string keyval = documentNode.Name + ": "
+ documentNode.Value;
sb.AppendLine(string.Format("{0}:{1}",
documentNode.Name, value.AsString));
string
FormName=Convert.ToString(value.AsString);
if(FormName.Contains("125"))
{
lblFormName.Text = "Accord Form 125";
string str = FormName.Substring(0, 8);
str= str.Split('(', ')')[1];
}
else if(FormName.Contains("126"))
{
lblFormName.Text = "Accord Form 126";
}
else if(FormName.Contains("140"))
{
lblFormName.Text = "Accord Form 140";
}
else
{
lblFormName.Text = "Please upload valid
form.";
}
}
lblFormName.Text = sb.ToString();
if (documentNode.Instances != null)
{
addDocumentNodeInstances(documentNode.Instances);
}
else if (documentNode.Children != null)
{
addDocumentNodeChildren(documentNode.Children);
}
}
private void addDocumentNodeInstances(IFieldInstances
instances)
{
for (int i = 0; i < instances.Count; i++)
{
if (instances[i].Children != null)
{
addDocumentNodeChildren(instances[i].Children);
}
}
}
}
}
Output :
No comments:
Post a Comment