VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
В этом разделе
    DOCX: Как сконвертировать DOCX/DOC-файл в TXT-файл?
    В этом разделе
    Вот C#/VB.NET код, который демонстрирует, как преобразовать DOCX/DOC-файл в TXT-файл:
    /// <summary>
    /// Converts DOCX file to a TXT file.
    /// </summary>
    public void ConvertDocxToTxt(string docxFilePath, string txtFilePath)
    {
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // open DOCX file
            images.Add(docxFilePath);
    
            try
            {
                // page number
                int pageNumber = 1;
    
                // document text content
                System.Text.StringBuilder content = new System.Text.StringBuilder();
    
                // for each page of DOCX file
                foreach (Vintasoft.Imaging.VintasoftImage image in images)
                {
                    // write page number
                    content.AppendFormat("\tPage Number: {0}", pageNumber++);
                    content.AppendLine();
                    content.AppendLine();
    
                    // find text region metadata
                    Vintasoft.Imaging.Metadata.TextRegionMetadata textRegionMetadata = 
                        image.Metadata.MetadataTree.FindChildNode<Vintasoft.Imaging.Metadata.TextRegionMetadata>();
    
                    // if current page has text content
                    if (textRegionMetadata != null)
                    {
                        // get text region
                        Vintasoft.Imaging.Text.TextRegion textRegion = textRegionMetadata.GetTextRegion();
    
                        if (textRegion != null)
                        {
                            // if text region has text content
                            if (textRegion.TextContent != null)
                                // write page text content
                                content.Append(textRegion.TextContent);
                        }
                    }
    
                    // if page separator must be added between pages
                    if (pageNumber < images.Count)
                    {
                        content.AppendLine();
                        content.AppendLine();
                        content.AppendLine();
                    }
                }
    
                // write DOCX file text content to a TXT file
                System.IO.File.WriteAllText(txtFilePath, content.ToString());
            }
            finally
            {
                // clear and dispose images
                images.ClearAndDisposeItems();
            }
        }
    }
    
    ''' <summary>
    ''' Converts DOCX file to a TXT file.
    ''' </summary>
    Public Sub ConvertDocxToTxt(docxFilePath As String, txtFilePath As String)
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' open DOCX file
            images.Add(docxFilePath)
    
            Try
                ' page number
                Dim pageNumber As Integer = 1
    
                ' document text content
                Dim content As New System.Text.StringBuilder()
    
                ' for each page of DOCX file
                For Each image As Vintasoft.Imaging.VintasoftImage In images
                    ' write page number
                    content.AppendFormat(vbTab & "Page Number: {0}", System.Math.Max(System.Threading.Interlocked.Increment(pageNumber),pageNumber - 1))
                    content.AppendLine()
                    content.AppendLine()
    
                    ' find text region metadata
                    Dim textRegionMetadata As Vintasoft.Imaging.Metadata.TextRegionMetadata = image.Metadata.MetadataTree.FindChildNode(Of Vintasoft.Imaging.Metadata.TextRegionMetadata)()
    
                    ' if current page has text content
                    If textRegionMetadata IsNot Nothing Then
                        ' get text region
                        Dim textRegion As Vintasoft.Imaging.Text.TextRegion = textRegionMetadata.GetTextRegion()
    
                        If textRegion IsNot Nothing Then
                            ' if text region has text content
                            If textRegion.TextContent IsNot Nothing Then
                                ' write page text content
                                content.Append(textRegion.TextContent)
                            End If
                        End If
                    End If
    
                    ' if page separator must be added between pages
                    If pageNumber < images.Count Then
                        content.AppendLine()
                        content.AppendLine()
                        content.AppendLine()
                    End If
                Next
    
                ' write DOCX file text content to a TXT file
                System.IO.File.WriteAllText(txtFilePath, content.ToString())
            Finally
                ' clear and dispose images
                images.ClearAndDisposeItems()
            End Try
        End Using
    End Sub