Attribute VB_Name = "TextDiff" Option Explicit Public Const DIFF_INSERT = "+" Public Const DIFF_DELETE = "-" Public Const DIFF_COPY = "=" Dim D() As Integer Private Function Min(a As Integer, b As Integer, c As Integer) As Integer Dim result As Integer result = a If b < result Then result = b If c < result Then result = c Min = result End Function ' Based on article http://www.merriampark.com/ld.htm ' Calculates string "similarity". The value returned is number of insert/delete operations needed to transform ' string 's' into string 't' Public Function LevenshteinDistance(ByVal s As String, ByVal t As String) As Integer Dim n As Integer ' length of s Dim m As Integer ' length of t Dim i As Integer Dim j As Integer Dim cs As String Dim ct As String Dim cost As Integer ' Step 1 n = Len(s) m = Len(t) ReDim D(0 To n, 0 To m) As Integer ' Step 2 For i = 0 To n D(i, 0) = i Next i For j = 0 To m D(0, j) = j Next j ' Step 3 For i = 1 To n cs = Mid(s, i, 1) ' Step 4 For j = 1 To m ct = Mid(t, j, 1) ' Step 5 If cs = ct Then cost = 0 Else cost = 1 End If ' Step 6 D(i, j) = Min(D(i - 1, j) + 1, D(i, j - 1) + 1, D(i - 1, j - 1) + cost) Next j Next i ' Step 7 LevenshteinDistance = D(n, m) End Function Private Function CanGoTo(ByVal i As Integer, ByVal j As Integer, ByVal di As Integer, ByVal dj As Integer, ByRef nexti As Integer, ByRef nextj As Integer, ByRef nextcost As Integer) As Boolean Dim ni As Integer Dim nj As Integer ni = i + di nj = j + dj CanGoTo = False If (ni >= 0) And (nj >= 0) Then If D(ni, nj) <= nextcost Then nextcost = D(ni, nj) nexti = ni nextj = nj CanGoTo = True End If End If End Function Private Function GetCodeAt(ByVal commands As String, index As Integer) As String GetCodeAt = Mid(commands, 1 + ((index - 1) * 2), 2) End Function Private Sub SetCodeAt(ByRef commands As String, index As Integer, ByVal value As String) Dim base As Integer base = 1 + ((index - 1) * 2) commands = Left(commands, base - 1) & value & Mid(commands, base + 2) End Sub Private Function RemoveCodeAt(ByRef commands As String, index As Integer) As String Dim base As Integer base = 1 + ((index - 1) * 2) RemoveCodeAt = Mid(commands, base, 2) commands = Left(commands, base - 1) & Mid(commands, base + 2) End Function Private Sub InsertCodeAt(ByRef commands As String, index As Integer, ByVal value As String) Dim base As Integer base = 1 + ((index - 1) * 2) commands = Left(commands, base - 1) & value & Mid(commands, base) End Sub Private Function OperationAt(ByVal commands As String, index As Integer) As String OperationAt = Mid(GetCodeAt(commands, index), 1, 1) End Function Private Sub MoveCode(ByRef commands As String, oldIndex As Integer, newIndex As Integer) If oldIndex = newIndex Then Exit Sub Dim value As String value = RemoveCodeAt(commands, oldIndex) Call InsertCodeAt(commands, newIndex, value) End Sub ' Based on article http://somethinkodd.com/oddthinking/2006/01/16/comparing-strings-an-analysis-of-diff-algorithms/ ' Returns sequence of command-value character pairs. ' Valid commands are DIFF_INSERT, DIFF_DELETE, DIFF_COPY Public Function Diff(ByVal u As String, ByVal v As String) As String Dim i As Integer Dim j As Integer Dim result As String Dim ni As Integer Dim nj As Integer Dim cost As Integer Dim operation As String Dim count As Integer ' calculate the matrix Call LevenshteinDistance(u, v) i = Len(u) j = Len(v) result = "" count = 0 ' A minimising path on a D matrix is one which at a given element travels up, left or diagonally up-and-left ' to the minimum of its "preceding" neighbours. If more than one neighbour has an equal value, the choice ' is arbitrary. The path continues until it reaches the top left corner. While (i > 0) Or (j > 0) cost = 32000 ' some very big number ' If the path travels from an element D[i,j] to its neighbour above it, D[i-1,j], this represents ' the deletion of a symbol from the string u at position i. If CanGoTo(i, j, -1, 0, ni, nj, cost) Then operation = DIFF_DELETE & Mid(u, i, 1) End If ' If the path travels from an element D[i,j] to its left neighbour, D[i,j-1], ' this represents the insertion of a symbol v[j] in the string u at position i. If CanGoTo(i, j, 0, -1, ni, nj, cost) Then operation = DIFF_INSERT & Mid(v, j, 1) End If ' If the path travels from an element D[i,j] to its upper-left neighbour, D[i-1,j-1], and u[i] = v[j], ' then this means that the two strings match at this point, and no operation is required. ' As character replacement (as opposed to deletion and insertion) is not allowed in this model, ' the path may not travel to the upper-left neighbour if u[i] <> v[j]. If (i > 0) And (j > 0) Then If (Mid(u, i, 1) = Mid(v, j, 1)) Then If CanGoTo(i, j, -1, -1, ni, nj, cost) Then operation = DIFF_COPY & Mid(u, i, 1) End If End If End If result = operation & result count = count + 1 i = ni j = nj Wend ' at this point we have the 'result' ready ' now we will group delete/insert operations to make it nicer ' we will translate -a+A-b+B-c+C sequences ' into -a-b-c+A+B+C Dim c As Integer Dim base As Integer Dim size As Integer c = 1 While c <= count - 1 If (OperationAt(result, c + 0) = DIFF_DELETE) And _ (OperationAt(result, c + 1) = DIFF_INSERT) Then ' start of the sequence base = c size = 1 Do If (OperationAt(result, base + (size * 2) + 0) = DIFF_DELETE) And _ (OperationAt(result, base + (size * 2) + 1) = DIFF_INSERT) Then size = size + 1 Else Exit Do End If Loop ' now 'size' is the number of consecutive delete/insert pairs ' 'base' is the start index Dim newIndex As Integer Dim oldIndex As Integer newIndex = base + 1 oldIndex = base + 2 While size > 1 Call MoveCode(result, oldIndex, newIndex) newIndex = newIndex + 1 oldIndex = oldIndex + 2 size = size - 1 Wend c = newIndex Else c = c + 1 End If Wend ' we will translate -a+B+C-d sequences ' into -a-d+B+C c = 1 While c <= count - 1 If (OperationAt(result, c + 0) = DIFF_DELETE) And _ (OperationAt(result, c + 1) = DIFF_INSERT) Then ' possibly start of insert... delete pattern base = c size = 1 Do If (OperationAt(result, base + size + 1) = DIFF_INSERT) Then size = size + 1 Else Exit Do End If Loop ' now 'size' is the number of consecutive inserts If (OperationAt(result, base + size + 1) = DIFF_DELETE) Then ' we have delete operation at 'base' and at 'base + size + 1' ' in between there are inserts. lets merge those blocks of deletes Do Call MoveCode(result, base + size + 1, base + 1) base = base + 1 If OperationAt(result, base + size + 1) <> DIFF_DELETE Then c = base + size + 1 Exit Do End If Loop Else c = base + size + 1 End If Else c = c + 1 End If Wend Diff = result End Function