В этом разделе
            
            Представляет абстрактный базовый класс, определяющий алгоритм текстового поиска.
            
            
Объектная модель
Синтаксис
            
            
            
            'Declaration
Public MustInherit Class TextSearchEngine
 
            
            public abstract class TextSearchEngine
 
            
            public __gc abstract class TextSearchEngine
 
            
            public ref class TextSearchEngine abstract
 
	 
	
         
Пример
Вот C#/VB.NET код, который демонстрирует, как создать систему текстового поиска для поиска цифр на PDF странице.
    
	
	    
	    
''' <summary>
''' Outputs the information about digits in content of PDF document.
''' </summary>
''' <param name="document">PDF document where digits should be searched.</param>
Public Sub SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(document As Vintasoft.Imaging.Pdf.PdfDocument)
    System.Console.WriteLine("Searching the digits in text of PDF document.")
    For i As Integer = 0 To document.Pages.Count - 1
        Dim textRegions As Vintasoft.Imaging.Text.TextRegion() = AdvancedDigitsSearchOnPdfPage(document.Pages(i))
        If textRegions IsNot Nothing Then
            For j As Integer = 0 To textRegions.Length - 1
                System.Console.WriteLine(String.Format("- Text={0}, Rectangle={1}", textRegions(j).TextContent, textRegions(j).Rectangle))
            Next
        End If
    Next
    System.Console.WriteLine("Searching the digits in text of PDF document is finished.")
End Sub
''' <summary>
''' Searches digits on PDF page.
''' </summary>
''' <param name="page">PDF page where digits should be searched.</param>
''' <returns>An array of text regions on PDF page where text was found.</returns>
Public Function AdvancedDigitsSearchOnPdfPage(page As Vintasoft.Imaging.Pdf.Tree.PdfPage) As Vintasoft.Imaging.Text.TextRegion()
    Dim textRegions As New System.Collections.Generic.List(Of Vintasoft.Imaging.Text.TextRegion)()
    Dim digitsSearchEngine As New DigitsSearchEngine()
    Dim textRegion As Vintasoft.Imaging.Text.TextRegion = Nothing
    Dim startIndex As Integer = 0
    Do
        ' search text
        textRegion = page.TextRegion.FindText(digitsSearchEngine, startIndex, False)
        If textRegion IsNot Nothing Then
            ' add result
            textRegions.Add(textRegion)
            ' shitf start index
            startIndex += textRegion.TextContent.Length
        End If
    Loop While textRegion IsNot Nothing
    Return textRegions.ToArray()
End Function
''' <summary>
''' Class for searching the digits in text of PDF page.
''' </summary>
Private Class DigitsSearchEngine
    Inherits Vintasoft.Imaging.Text.TextSearchEngine
    ''' <summary>
    ''' Searches the first text matching in the string of PDF page.
    ''' </summary>
    ''' <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
    ''' <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
    ''' <param name="length">The number of characters, in the sourceString, to analyze.</param>
    ''' <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
    ''' <returns>
    ''' Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
    ''' contains information about searched text if text is found; otherwise, null.
    ''' </returns>
    Public Overrides Function Find(sourceString As String, startIndex As Integer, length As Integer, rightToLeft As Boolean) As Vintasoft.Imaging.Text.TextSearchResult
        Dim startDigitIndex As Integer = -1
        Dim endDigitIndex As Integer = -1
        Dim start As Integer = 0
        Dim [end] As Integer = 0
        ' if searching text from the right to the left
        If rightToLeft Then
            start = startIndex + length
            [end] = 0
            For index As Integer = start - 1 To [end] Step -1
                If Char.IsDigit(sourceString(index)) AndAlso endDigitIndex = -1 Then
                    endDigitIndex = index + 1
                ElseIf Not Char.IsDigit(sourceString(index)) AndAlso endDigitIndex <> -1 Then
                    startDigitIndex = index + 1
                    Exit For
                End If
            Next
            If endDigitIndex <> -1 AndAlso startDigitIndex = -1 Then
                startDigitIndex = 0
            End If
        Else
            ' if searching text from the left to the right
            start = startIndex
            [end] = startIndex + length
            For index As Integer = start To [end] - 1
                If Char.IsDigit(sourceString(index)) AndAlso startDigitIndex = -1 Then
                    startDigitIndex = index
                ElseIf Not Char.IsDigit(sourceString(index)) AndAlso startDigitIndex <> -1 Then
                    endDigitIndex = index
                    Exit For
                End If
            Next
            If startDigitIndex <> -1 AndAlso endDigitIndex = -1 Then
                endDigitIndex = [end]
            End If
        End If
        ' if digit is not found
        If startDigitIndex = -1 Then
            Return Nothing
        End If
        ' return the text search result
        Return New Vintasoft.Imaging.Text.TextSearchResult(startDigitIndex, endDigitIndex - startDigitIndex)
    End Function
End Class
	     
	 
 
    
	
	    
	    
/// <summary>
/// Outputs the information about digits in content of PDF document.
/// </summary>
/// <param name="document">PDF document where digits should be searched.</param>
public void SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(Vintasoft.Imaging.Pdf.PdfDocument document)
{
    System.Console.WriteLine("Searching the digits in text of PDF document.");
    for (int i = 0; i < document.Pages.Count; i++)
    {
        Vintasoft.Imaging.Text.TextRegion[] textRegions = 
            AdvancedDigitsSearchOnPdfPage(document.Pages[i]);
        if (textRegions != null)
        {
            for (int j = 0; j < textRegions.Length; j++)
            {
                System.Console.WriteLine(string.Format("- Text={0}, Rectangle={1}",
                    textRegions[j].TextContent,
                    textRegions[j].Rectangle));
            }
        }
    }
    System.Console.WriteLine("Searching the digits in text of PDF document is finished.");
}
/// <summary>
/// Searches digits on PDF page.
/// </summary>
/// <param name="page">PDF page where digits should be searched.</param>
/// <returns>An array of text regions on PDF page where text was found.</returns>
public Vintasoft.Imaging.Text.TextRegion[] AdvancedDigitsSearchOnPdfPage(
    Vintasoft.Imaging.Pdf.Tree.PdfPage page)
{
    System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion> textRegions = 
        new System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion>();
    DigitsSearchEngine digitsSearchEngine = new DigitsSearchEngine();
    Vintasoft.Imaging.Text.TextRegion textRegion = null;
    int startIndex = 0;
    do
    {
        // search text
        textRegion = page.TextRegion.FindText(digitsSearchEngine, ref startIndex, false);
        if (textRegion != null)
        {
            // add result
            textRegions.Add(textRegion);
            // shitf start index
            startIndex += textRegion.TextContent.Length;
        }
    } while (textRegion != null);
    return textRegions.ToArray();
}
/// <summary>
/// Class for searching the digits in text of PDF page.
/// </summary>
class DigitsSearchEngine : Vintasoft.Imaging.Text.TextSearchEngine
{
    /// <summary>
    /// Searches the first text matching in the string of PDF page.
    /// </summary>
    /// <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
    /// <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
    /// <param name="length">The number of characters, in the sourceString, to analyze.</param>
    /// <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
    /// <returns>
    /// Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
    /// contains information about searched text if text is found; otherwise, null.
    /// </returns>
    public override Vintasoft.Imaging.Text.TextSearchResult Find(
        string sourceString, int startIndex, int length, bool rightToLeft)
    {
        int startDigitIndex = -1;
        int endDigitIndex = -1;
        int start = 0;
        int end = 0;
        // if searching text from the right to the left
        if (rightToLeft)
        {
            start = startIndex + length;
            end = 0;
            for (int index = start - 1; index >= end; index--)
            {
                if (char.IsDigit(sourceString[index]) && endDigitIndex == -1)
                    endDigitIndex = index + 1;
                else if (!char.IsDigit(sourceString[index]) && endDigitIndex != -1)
                {
                    startDigitIndex = index + 1;
                    break;
                }
            }
            if (endDigitIndex != -1 && startDigitIndex == -1)
                startDigitIndex = 0;
        }
        // if searching text from the left to the right
        else
        {
            start = startIndex;
            end = startIndex + length;
            for (int index = start; index < end; index++)
            {
                if (char.IsDigit(sourceString[index]) && startDigitIndex == -1)
                    startDigitIndex = index;
                else if (!char.IsDigit(sourceString[index]) && startDigitIndex != -1)
                {
                    endDigitIndex = index;
                    break;
                }
            }
            if (startDigitIndex != -1 && endDigitIndex == -1)
                endDigitIndex = end;
        }
        // if digit is not found
        if (startDigitIndex == -1)
            return null;
        // return the text search result
        return new Vintasoft.Imaging.Text.TextSearchResult(
            startDigitIndex, endDigitIndex - startDigitIndex);
    }
}
	     
	 
 
 
Иерархия наследования
System.Object
   Vintasoft.Imaging.Text.TextSearchEngine
 
Требования
Целевые платформы: .NET 9; .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5
 
Смотрите также