the project sample is a desktop application , this turn out the task hard, understand a code with desktop components associate.
i need a code project type "console application"
my code works , a little :S
but do not is efficient, i know exist a better way, I'm not getting make it works
forgive my poor english
so , the code ... this was my better solution until now
part 1
Code: Select all
List<VintasoftImage> images = new List<VintasoftImage>();
VintasoftImage imagem = new VintasoftImage(path + fileName + ext);
imagem.RenderingSettings = new RenderingSettings(new Resolution(96 * fator, 96 * fator), InterpolationMode.HighQualityBicubic, SmoothingMode.HighQuality);
FormRecognitionManager = new FormRecognitionManager();
FormRecognitionManager.FormTemplates.LoadFromDocument(pathTemplate + template + extTemplate);
images.Add(imagem);
FormDocumentTemplate FormDocumentTemplate = FormDocumentTemplate.Deserialize(pathTemplate + template + extTemplate);
method
Code: Select all
public static String PreprocessAndOcrImages(FormDocumentTemplate FormDocumentTemplate, OcrLanguage language, string filename)
{
String resultado = "";
//Hashtable resultado = new Hashtable();
List<RecognitionRegion> regions = new List<RecognitionRegion>();
List<FormFieldTemplate> campos = (FormDocumentTemplate.Pages[0].Items[0] as Vintasoft.Imaging.FormsProcessing.FormRecognition.FormFieldTemplateGroup).Items.ToList();
foreach(FormFieldTemplate campo in campos){
int x = (int)campo.BoundingBox.X*fator;
int y = (int)campo.BoundingBox.Y*fator;
int w = (int)campo.BoundingBox.Width*fator;
int h = (int)campo.BoundingBox.Height*fator;
regions.Add(new RecognitionRegion(new RegionOfInterest(x, y, w, h),language));
}
// load image(s)
ImageCollection images = new ImageCollection();
VintasoftImage imagem = new VintasoftImage(filename);
imagem.RenderingSettings = new RenderingSettings(new Resolution(96 * fator, 96 * fator), InterpolationMode.HighQualityBicubic, SmoothingMode.HighQuality);
images.Add(imagem);
//Console.WriteLine("Create Tesseract OCR engine...");
using (TesseractOcr tesseractOcr = new TesseractOcr(TesseractOcrDllDirectory))
{
// create OCR engine manager
OcrEngineManager engineManager =
new OcrEngineManager(tesseractOcr);
OcrEngineSettings settings = new OcrEngineSettings(language);
// foreach image
foreach (VintasoftImage image in images)
{
//Console.WriteLine("Preprocess image:");
//Console.WriteLine("BorderClear, Despeckle, Deskew, Segmentation...");
OcrPreprocessingCommand preprocessing = new OcrPreprocessingCommand();
preprocessing.Binarization = null;
preprocessing.ExecuteInPlace(image);
//Console.WriteLine("Recognize image...");
//OcrPage page = engineManager.Recognize(image, settings, preprocessing.SegmentationTextRegions);
OcrPage page = engineManager.Recognize(image, settings, regions);
//Console.WriteLine("Page Text:");
//Console.WriteLine(page.GetText());
Console.WriteLine();
//for (int i = 0; i < page.Regions.Count; i++)
//{
// Rectangle ret = new Rectangle(
// (int)campos[i].BoundingBox.X * fator,
// (int)campos[i].BoundingBox.Y * fator,
// (int)campos[i].BoundingBox.Width * fator,
// (int)campos[i].BoundingBox.Height * fator);
// Object[] obj= page.GetObjects(OcrObjectType.TextRegion, ret);
// var dados = obj==null ||obj.Length==0 ?"":(obj[0] as OcrTextRegion).Text.Trim();
// resultado.Add(campos[i].Name,dados);
//}
resultado += page.GetText();
}
}
// free resources
images.ClearAndDisposeItems();
images.Dispose();
return resultado;
}