
[DefaultProperty("Language")] public class SearchablePdfGenerator : Vintasoft.Imaging.Processing.ProcessingCommand<TTarget>, IProcessingCommand<TTarget>
[DefaultProperty("Language")] public __gc class SearchablePdfGenerator : public Vintasoft.Imaging.Processing.ProcessingCommand<TTarget*>*, IProcessingCommand<TTarget>
[DefaultProperty("Language")] public ref class SearchablePdfGenerator : public Vintasoft.Imaging.Processing.ProcessingCommand<TTarget^>^, IProcessingCommand<TTarget>
'Declaration <DefaultPropertyAttribute("Language")> Public Class SearchablePdfGenerator Inherits Vintasoft.Imaging.Processing.ProcessingCommand(Of TTarget) Implements IProcessingCommand<TTarget>
Вот C#/VB.NET код, который демонстрирует, как преобразовать файл изображения в PDF документ с возможностью поиска:
''' <summary> ''' Converts an image file to a searchable PDF document. ''' </summary> ''' <param name="sourceFilePath">A filename of source image file.</param> ''' <param name="pageCreationMode">The PDF page creation mode.</param> ''' <param name="ocrLanguage">An OCR language that should be used for text recognition.</param> ''' <param name="pdfFilename">A filename of destination PDF file.</param> Public Shared Sub ConvertImagesToSearchablePdf(sourceFilePath As String, pageCreationMode As Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode, ocrLanguage As Vintasoft.Imaging.Ocr.OcrLanguage, pdfFilename As String) ' create image collection Using images As New Vintasoft.Imaging.ImageCollection() ' add images to the image collection images.Add(sourceFilePath) Try ' create Tesseract OCR engine Using tesseractOcrEngine As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr() ' create OCR engine manager Dim ocrEngineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcrEngine) ' create PDF generator Dim pdfGenerator As New Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator(ocrEngineManager) ' set source images in PDF generator pdfGenerator.SourceImages = images ' set PDF page creation mode in PDF generator pdfGenerator.PageCreationMode = pageCreationMode ' create Tesseract OCR settings Dim tesseractOcrSettings As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(ocrLanguage) tesseractOcrSettings.RecognitionRegionType = Vintasoft.Imaging.Ocr.RecognitionRegionType.RecognizePageWithPageSegmentationAndOrientationDetection ' if PDF pages must be created in "TextOverImage" mode If pageCreationMode = Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.TextOverImage Then tesseractOcrSettings.UseSymbolRegionsCorrection = True Else tesseractOcrSettings.UseSymbolRegionsCorrection = False End If ' set Tesseract OCR settings pdfGenerator.OcrEngineSettings = tesseractOcrSettings ' if PDF pages must be created in "Text" mode If pageCreationMode = Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.Text Then ' set text color pdfGenerator.TextColor = System.Drawing.Color.Black ' text text font name pdfGenerator.TextOnlyFontName = "Arial" End If ' subscribe to the PDF page added event for saving changes to PDF document AddHandler pdfGenerator.PdfPageAdded, AddressOf Command_PdfPageAdded ' subscribe to the image processing started event for preprocessing image AddHandler pdfGenerator.ImageProcessingStarted, AddressOf PdfGenerator_ImageProcessingStarted ' create PDF document Using document As New Vintasoft.Imaging.Pdf.PdfDocument(pdfFilename, System.IO.FileMode.Create, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_16) ' generate PDF document pages pdfGenerator.Execute(document) ' save PDF document document.SaveChanges() End Using End Using Finally ' remove images images.ClearAndDisposeItems() End Try End Using End Sub ''' <summary> ''' Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.ImageProcessingStarted"/> event. ''' </summary> Private Shared Sub PdfGenerator_ImageProcessingStarted(sender As Object, e As Vintasoft.Imaging.Pdf.Ocr.OcrImageProcessingEventArgs) ' create image copy Dim image As Vintasoft.Imaging.VintasoftImage = DirectCast(e.Image.Clone(), Vintasoft.Imaging.VintasoftImage) Try ' preprocess image Dim deskewCommand As New Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand() deskewCommand.ExecuteInPlace(image) Dim autoTextOrientationCommand As New Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand() autoTextOrientationCommand.ExecuteInPlace(image) ' change the processing image (the processing command will dispose image after use) e.Image = image Catch image.Dispose() ' skip image e.Image = Nothing End Try End Sub ''' <summary> ''' Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.PdfPageAdded"/> event. ''' </summary> Private Shared Sub Command_PdfPageAdded(sender As Object, e As Vintasoft.Imaging.Pdf.Ocr.PdfPageAddedEventArgs) ' save PDF document every 10 pages If e.Document.Pages.Count Mod 10 = 0 Then e.Document.SaveChanges() End If End Sub
/// <summary> /// Converts an image file to a searchable PDF document. /// </summary> /// <param name="sourceFilePath">A filename of source image file.</param> /// <param name="pageCreationMode">The PDF page creation mode.</param> /// <param name="ocrLanguage">An OCR language that should be used for text recognition.</param> /// <param name="pdfFilename">A filename of destination PDF file.</param> public static void ConvertImagesToSearchablePdf( string sourceFilePath, Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode pageCreationMode, Vintasoft.Imaging.Ocr.OcrLanguage ocrLanguage, string pdfFilename) { // create image collection using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection()) { // add images to the image collection images.Add(sourceFilePath); try { // create Tesseract OCR engine using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcrEngine = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()) { // create OCR engine manager Vintasoft.Imaging.Ocr.OcrEngineManager ocrEngineManager = new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcrEngine); // create PDF generator Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator pdfGenerator = new Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator(ocrEngineManager); // set source images in PDF generator pdfGenerator.SourceImages = images; // set PDF page creation mode in PDF generator pdfGenerator.PageCreationMode = pageCreationMode; // create Tesseract OCR settings Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings tesseractOcrSettings = new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcrSettings(ocrLanguage); tesseractOcrSettings.RecognitionRegionType = Vintasoft.Imaging.Ocr.RecognitionRegionType.RecognizePageWithPageSegmentationAndOrientationDetection; // if PDF pages must be created in "TextOverImage" mode if (pageCreationMode == Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.TextOverImage) tesseractOcrSettings.UseSymbolRegionsCorrection = true; else tesseractOcrSettings.UseSymbolRegionsCorrection = false; // set Tesseract OCR settings pdfGenerator.OcrEngineSettings = tesseractOcrSettings; // if PDF pages must be created in "Text" mode if (pageCreationMode == Vintasoft.Imaging.Pdf.Ocr.PdfPageCreationMode.Text) { // set text color pdfGenerator.TextColor = System.Drawing.Color.Black; // text text font name pdfGenerator.TextOnlyFontName = "Arial"; } // subscribe to the PDF page added event for saving changes to PDF document pdfGenerator.PdfPageAdded += Command_PdfPageAdded; // subscribe to the image processing started event for preprocessing image pdfGenerator.ImageProcessingStarted += PdfGenerator_ImageProcessingStarted; // create PDF document using (Vintasoft.Imaging.Pdf.PdfDocument document = new Vintasoft.Imaging.Pdf.PdfDocument( pdfFilename, System.IO.FileMode.Create, Vintasoft.Imaging.Pdf.PdfFormat.Pdf_16)) { // generate PDF document pages pdfGenerator.Execute(document); // save PDF document document.SaveChanges(); } } } finally { // remove images images.ClearAndDisposeItems(); } } } /// <summary> /// Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.ImageProcessingStarted"/> event. /// </summary> private static void PdfGenerator_ImageProcessingStarted(object sender, Vintasoft.Imaging.Pdf.Ocr.OcrImageProcessingEventArgs e) { // create image copy Vintasoft.Imaging.VintasoftImage image = (Vintasoft.Imaging.VintasoftImage)e.Image.Clone(); try { // preprocess image Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand deskewCommand = new Vintasoft.Imaging.ImageProcessing.Document.DeskewCommand(); deskewCommand.ExecuteInPlace(image); Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand autoTextOrientationCommand = new Vintasoft.Imaging.ImageProcessing.Document.AutoTextOrientationCommand(); autoTextOrientationCommand.ExecuteInPlace(image); // change the processing image (the processing command will dispose image after use) e.Image = image; } catch { image.Dispose(); // skip image e.Image = null; } } /// <summary> /// Handles the <see cref="Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator.PdfPageAdded"/> event. /// </summary> private static void Command_PdfPageAdded(object sender, Vintasoft.Imaging.Pdf.Ocr.PdfPageAddedEventArgs e) { // save PDF document every 10 pages if (e.Document.Pages.Count % 10 == 0) e.Document.SaveChanges(); }
System.Object
Vintasoft.Imaging.Processing.ProcessingCommand<Vintasoft.Imaging.Pdf.PdfDocument>
Vintasoft.Imaging.Pdf.Ocr.SearchablePdfGenerator
Целевые платформы: .NET 9; .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5