Home
Search
 
What's New
Index
Books
Links
Q & A
Newsletter
Banners
 
Feedback
Tip Jar
 
C# Helper...
 
XML RSS Feed
Follow VBHelper on Twitter
 
 
 
MSDN Visual Basic Community
 
 
 
 
 
TitleList the words contained in a Word document
Description
Keywordswords, list words, Word, Word document, Office, automation
CategoriesFiles and Directories, Algorithms, Office
 
Subroutine GetFileWords opens the file, grabs its contents, and calls the GetWords function. Functions GrabTextFile and GrabWordFile get the textual content of text files and Word files respectively.

GetWords replaces all characters that are not numbers or letters with spaces and then uses Split to move the words into an array. Next it copies the words into a Collection using each word as its own key value. The collection doesn't allow duplicate keys so this makes a unique Collection of words. The function then copies the collection's strings into an array and calls subroutine Quicksort to sort the strings. It finishes by copying the sorted words into a string separated by vbCrLf characters.

 
' Return a sorted list of the words in this file.
Private Function GetFileWords(ByVal file_name As String) As _
    String
Dim file_contents As String

    If Right$(file_name, 4) = ".doc" Then
        ' Read as a Word document.
        file_contents = GrabWordFile(file_name)
    Else
        file_contents = GrabTextFile(file_name)
    End If

    GetFileWords = GetWords(file_contents)
End Function

' Return a text file's contents.
Private Function GrabTextFile(ByVal file_name As String) As _
    String
Dim fnum As Integer

    fnum = FreeFile
    Open file_name For Input As #fnum
    GrabTextFile = Input$(LOF(fnum), #fnum)
    Close #fnum
End Function

' Read the text contents of a Word file.
Private Function GrabWordFile(ByVal file_name As String) As _
    String
Const OPEN_FORMAT_AUTO = 0
Dim word_server As Object ' Word.Application

    ' Start the Word server.
    Set word_server = CreateObject("Word.Application")

    ' Open the file.
    word_server.Documents.Open _
        FileName:=file_name, _
        ConfirmConversions:=False, _
        ReadOnly:=False, _
        AddToRecentFiles:=False, _
        PasswordDocument:="", _
        PasswordTemplate:="", _
        Revert:=False, _
        WritePasswordDocument:="", _
        WritePasswordTemplate:="", _
        Format:=OPEN_FORMAT_AUTO

    ' Return the document's text.
    GrabWordFile = word_server.ActiveDocument.Content.Text

    ' Close the document without prompting.
    word_server.ActiveDocument.Close False
    word_server.Quit
End Function

' Return a sorted list of the string's words.
Private Function GetWords(ByVal file_contents As String) As _
    String
Dim i As Integer
Dim ch As String
Dim word_array() As String
Dim word_col As Collection
Dim Word As String
Dim result As String

    ' Replace separator characters with spaces.
    For i = 1 To Len(file_contents)
        ' See if this character is a letter or number.
        ch = Mid$(file_contents, i, 1)
        If Not ( _
            (ch >= "A" And ch <= "Z") Or _
            (ch >= "a" And ch <= "z") Or _
            (ch >= "0" And ch <= "9") _
        ) Then
            ' Not a letter. Replace with space.
            Mid$(file_contents, i, 1) = " "
        End If
    Next i
    file_contents = LCase$(file_contents)

    ' Split the words.
    word_array = Split(file_contents)

    ' Add the words to the word collection.
    Set word_col = New Collection
    On Error Resume Next
    For i = LBound(word_array) To UBound(word_array)
        Word = word_array(i)
        If Len(Word) > 0 Then word_col.Add Word, Word
    Next i
    On Error GoTo 0

    ' Convert the collection into an array.
    ReDim word_array(1 To word_col.Count)
    For i = 1 To word_col.Count
        word_array(i) = word_col(i)
    Next i

    ' Sort the array.
    Quicksort word_array, 1, word_col.Count

    ' Generate the result string.
    result = ""
    For i = 1 To word_col.Count
        result = result & vbCrLf & word_array(i)
    Next i

    GetWords = Mid$(result, Len(vbCrLf) + 1)
End Function

' Use Quicksort to sort a list of strings.
'
' This code is from the book "Ready-to-Run
' Visual Basic Algorithms" by Rod Stephens.
' http://www.vb-helper.com/vba.htm
Private Sub Quicksort(list() As String, ByVal min As Long, _
    ByVal max As Long)
Dim mid_value As String
Dim hi As Long
Dim lo As Long
Dim i As Long

    ' If there is 0 or 1 item in the list,
    ' this sublist is sorted.
    If min >= max Then Exit Sub

    ' Pick a dividing value.
    i = Int((max - min + 1) * Rnd + min)
    mid_value = list(i)

    ' Swap the dividing value to the front.
    list(i) = list(min)

    lo = min
    hi = max
    Do
        ' Look down from hi for a value < mid_value.
        Do While list(hi) >= mid_value
            hi = hi - 1
            If hi <= lo Then Exit Do
        Loop
        If hi <= lo Then
            list(lo) = mid_value
            Exit Do
        End If

        ' Swap the lo and hi values.
        list(lo) = list(hi)

        ' Look up from lo for a value >= mid_value.
        lo = lo + 1
        Do While list(lo) < mid_value
            lo = lo + 1
            If lo >= hi Then Exit Do
        Loop
        If lo >= hi Then
            lo = hi
            list(hi) = mid_value
            Exit Do
        End If

        ' Swap the lo and hi values.
        list(hi) = list(lo)
    Loop

    ' Sort the two sublists.
    Quicksort list, min, lo - 1
    Quicksort list, lo + 1, max
End Sub
 
 
Copyright © 1997-2010 Rocky Mountain Computer Consulting, Inc.   All rights reserved.
  Updated