VintaSoft Imaging .NET SDK 12.3: Documentation for .NET developer
Vintasoft.Imaging.Ocr.Results Namespace / OcrResultsEditor Class
Members Object Syntax Example Hierarchy Requirements SeeAlso
In This Topic
    OcrResultsEditor Class
    In This Topic
    Represents an editor of OCR results.
    Object Model
    OcrResultsEditor
    Syntax
    'Declaration
    
    Public Class OcrResultsEditor
    
    
    public class OcrResultsEditor
    
    
    public __gc class OcrResultsEditor
    
    
    public ref class OcrResultsEditor
    
    
    Example

    This C#/VB.NET code shows how to filter OCR results.

    
    ''' <summary>
    ''' Recognizes text in images,
    ''' removes words with low confidence from recognized text and
    ''' returns recognized text.
    ''' </summary>
    ''' <param name="filename">The name of the file containing image to OCR.</param>
    Public Function RecognizeTextAndFilterRecognitionResult(filename As String) As String
        ' minimum confidence
        Const  MIN_CONFIDENCE As Single = 75F
    
        ' create image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images from file to image collection
            images.Add(filename)
    
            ' create tesseract OCR engine
            Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
                ' create tesseract OCR settings
                Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English)
                tesseractOcr.Init(settings)
    
                ' create result builder
                Dim result As New System.Text.StringBuilder()
    
                ' for each image in image collection
                For Each image As Vintasoft.Imaging.VintasoftImage In images
                    ' recognize the image
                    Dim page As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize(image)
    
                    ' get all words in recognized text
                    Dim ocrObjects As Vintasoft.Imaging.Ocr.Results.OcrObject() = page.GetObjects(Vintasoft.Imaging.Ocr.OcrObjectType.Word)
                    ' create list of words to remove
                    Dim removeObjects As New System.Collections.Generic.List(Of Vintasoft.Imaging.Ocr.Results.OcrObject)()
                    ' for each word
                    For Each word As Vintasoft.Imaging.Ocr.Results.OcrObject In ocrObjects
                        ' if word confidence is less than minimum confidence
                        If word.Confidence < MIN_CONFIDENCE Then
                            ' add word to a list of words to remove
                            removeObjects.Add(word)
                        End If
                    Next
    
                    ' validate recognition results (remove words with low confidence)
    
                    Dim editor As New Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page)
                    editor.RemoveObjects(removeObjects.ToArray())
                    editor.ValidateResults()
    
                    ' get recognized text
                    Dim text As String = page.GetText()
                    ' add recognized text to result
                    result.Append(text)
                    result.AppendLine()
                Next
    
                ' dispose images and clear image collection
                images.ClearAndDisposeItems()
    
                ' return result
                Return result.ToString()
            End Using
        End Using
    End Function
    
    
    
    /// <summary>
    /// Recognizes text in images,
    /// removes words with low confidence from recognized text and
    /// returns recognized text.
    /// </summary>
    /// <param name="filename">The name of the file containing image to OCR.</param>
    public string RecognizeTextAndFilterRecognitionResult(string filename)
    {
        // minimum confidence
        const float MIN_CONFIDENCE = 75.0f;
    
        // create image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // add images from file to image collection
            images.Add(filename);
    
            // create tesseract OCR engine
            using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = 
                new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
            {
                // create tesseract OCR settings
                Vintasoft.Imaging.Ocr.OcrEngineSettings settings = 
                    new Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English);
                tesseractOcr.Init(settings);
    
                // create result builder
                System.Text.StringBuilder result = new System.Text.StringBuilder();
    
                // for each image in image collection
                foreach (Vintasoft.Imaging.VintasoftImage image in images)
                {
                    // recognize the image
                    Vintasoft.Imaging.Ocr.Results.OcrPage page = tesseractOcr.Recognize(image);
    
                    // get all words in recognized text
                    Vintasoft.Imaging.Ocr.Results.OcrObject[] ocrObjects = page.GetObjects(
                        Vintasoft.Imaging.Ocr.OcrObjectType.Word);
                    // create list of words to remove
                    System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject> removeObjects = 
                        new System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject>();
                    // for each word
                    foreach (Vintasoft.Imaging.Ocr.Results.OcrObject word in ocrObjects)
                    {
                        // if word confidence is less than minimum confidence
                        if (word.Confidence < MIN_CONFIDENCE)
                            // add word to a list of words to remove
                            removeObjects.Add(word);
                    }
    
                    // validate recognition results (remove words with low confidence)
    
                    Vintasoft.Imaging.Ocr.Results.OcrResultsEditor editor = 
                        new Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page);
                    editor.RemoveObjects(removeObjects.ToArray());
                    editor.ValidateResults();
    
                    // get recognized text
                    string text = page.GetText();
                    // add recognized text to result
                    result.Append(text);
                    result.AppendLine();
                }
    
                // dispose images and clear image collection
                images.ClearAndDisposeItems();
    
                // return result
                return result.ToString();
            }
        }
    }
    
    

    Inheritance Hierarchy

    System.Object
       Vintasoft.Imaging.Ocr.Results.OcrResultsEditor

    Requirements

    Target Platforms: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5

    See Also