Tento zdrojový kód je k dispozici ke stažení zde: TextDiff.bas(~8 KB)
1: | Attribute VB_Name = "TextDiff" |
2: | Option Explicit |
3: | Public Const DIFF_INSERT = "+" |
4: | Public Const DIFF_DELETE = "-" |
5: | Public Const DIFF_COPY = "=" |
6: | Dim D() As Integer |
7: | Private Function Min(a As Integer, b As Integer, c As Integer) As Integer |
8: | Dim result As Integer |
9: | result = a |
10: | If b < result Then result = b |
11: | If c < result Then result = c |
12: | Min = result |
13: | End Function |
14: | ' Based on article http://www.merriampark.com/ld.htm |
15: | ' Calculates string "similarity". The value returned is number of insert/delete operations needed to transform |
16: | ' string 's' into string 't' |
17: | Public Function LevenshteinDistance(ByVal s As String, ByVal t As String) As Integer |
18: | Dim n As Integer ' length of s |
19: | Dim m As Integer ' length of t |
20: | Dim i As Integer |
21: | Dim j As Integer |
22: | Dim cs As String |
23: | Dim ct As String |
24: | Dim cost As Integer |
25: | |
26: | ' Step 1 |
27: | n = Len(s) |
28: | m = Len(t) |
29: | ReDim D(0 To n, 0 To m) As Integer |
30: | |
31: | ' Step 2 |
32: | For i = 0 To n |
33: | D(i, 0) = i |
34: | Next i |
35: | |
36: | For j = 0 To m |
37: | D(0, j) = j |
38: | Next j |
39: | |
40: | ' Step 3 |
41: | For i = 1 To n |
42: | cs = Mid(s, i, 1) |
43: | |
44: | ' Step 4 |
45: | For j = 1 To m |
46: | ct = Mid(t, j, 1) |
47: | |
48: | ' Step 5 |
49: | If cs = ct Then |
50: | cost = 0 |
51: | Else |
52: | cost = 1 |
53: | End If |
54: | |
55: | ' Step 6 |
56: | D(i, j) = Min(D(i - 1, j) + 1, D(i, j - 1) + 1, D(i - 1, j - 1) + cost) |
57: | Next j |
58: | Next i |
59: | |
60: | ' Step 7 |
61: | LevenshteinDistance = D(n, m) |
62: | End Function |
63: | Private Function CanGoTo(ByVal i As Integer, ByVal j As Integer, ByVal di As Integer, ByVal dj As Integer, ByRef nexti As Integer, ByRef nextj As Integer, ByRef nextcost As Integer) As Boolean |
64: | Dim ni As Integer |
65: | Dim nj As Integer |
66: | ni = i + di |
67: | nj = j + dj |
68: | |
69: | CanGoTo = False |
70: | |
71: | If (ni >= 0) And (nj >= 0) Then |
72: | If D(ni, nj) <= nextcost Then |
73: | nextcost = D(ni, nj) |
74: | nexti = ni |
75: | nextj = nj |
76: | CanGoTo = True |
77: | End If |
78: | End If |
79: | End Function |
80: | Private Function GetCodeAt(ByVal commands As String, index As Integer) As String |
81: | GetCodeAt = Mid(commands, 1 + ((index - 1) * 2), 2) |
82: | End Function |
83: | Private Sub SetCodeAt(ByRef commands As String, index As Integer, ByVal value As String) |
84: | Dim base As Integer |
85: | base = 1 + ((index - 1) * 2) |
86: | commands = Left(commands, base - 1) & value & Mid(commands, base + 2) |
87: | End Sub |
88: | Private Function RemoveCodeAt(ByRef commands As String, index As Integer) As String |
89: | Dim base As Integer |
90: | base = 1 + ((index - 1) * 2) |
91: | RemoveCodeAt = Mid(commands, base, 2) |
92: | commands = Left(commands, base - 1) & Mid(commands, base + 2) |
93: | End Function |
94: | Private Sub InsertCodeAt(ByRef commands As String, index As Integer, ByVal value As String) |
95: | Dim base As Integer |
96: | base = 1 + ((index - 1) * 2) |
97: | commands = Left(commands, base - 1) & value & Mid(commands, base) |
98: | End Sub |
99: | Private Function OperationAt(ByVal commands As String, index As Integer) As String |
100: | OperationAt = Mid(GetCodeAt(commands, index), 1, 1) |
101: | End Function |
102: | Private Sub MoveCode(ByRef commands As String, oldIndex As Integer, newIndex As Integer) |
103: | If oldIndex = newIndex Then Exit Sub |
104: | |
105: | Dim value As String |
106: | value = RemoveCodeAt(commands, oldIndex) |
107: | Call InsertCodeAt(commands, newIndex, value) |
108: | End Sub |
109: | ' Based on article http://somethinkodd.com/oddthinking/2006/01/16/comparing-strings-an-analysis-of-diff-algorithms/ |
110: | ' Returns sequence of command-value character pairs. |
111: | ' Valid commands are DIFF_INSERT, DIFF_DELETE, DIFF_COPY |
112: | Public Function Diff(ByVal u As String, ByVal v As String) As String |
113: | Dim i As Integer |
114: | Dim j As Integer |
115: | Dim result As String |
116: | |
117: | Dim ni As Integer |
118: | Dim nj As Integer |
119: | Dim cost As Integer |
120: | Dim operation As String |
121: | Dim count As Integer |
122: | |
123: | ' calculate the matrix |
124: | Call LevenshteinDistance(u, v) |
125: | i = Len(u) |
126: | j = Len(v) |
127: | result = "" |
128: | count = 0 |
129: | |
130: | ' A minimising path on a D matrix is one which at a given element travels up, left or diagonally up-and-left |
131: | ' to the minimum of its "preceding" neighbours. If more than one neighbour has an equal value, the choice |
132: | ' is arbitrary. The path continues until it reaches the top left corner. |
133: | While (i > 0) Or (j > 0) |
134: | cost = 32000 ' some very big number |
135: | |
136: | ' If the path travels from an element D[i,j] to its neighbour above it, D[i-1,j], this represents |
137: | ' the deletion of a symbol from the string u at position i. |
138: | If CanGoTo(i, j, -1, 0, ni, nj, cost) Then |
139: | operation = DIFF_DELETE & Mid(u, i, 1) |
140: | End If |
141: | |
142: | ' If the path travels from an element D[i,j] to its left neighbour, D[i,j-1], |
143: | ' this represents the insertion of a symbol v[j] in the string u at position i. |
144: | If CanGoTo(i, j, 0, -1, ni, nj, cost) Then |
145: | operation = DIFF_INSERT & Mid(v, j, 1) |
146: | End If |
147: | |
148: | ' If the path travels from an element D[i,j] to its upper-left neighbour, D[i-1,j-1], and u[i] = v[j], |
149: | ' then this means that the two strings match at this point, and no operation is required. |
150: | ' As character replacement (as opposed to deletion and insertion) is not allowed in this model, |
151: | ' the path may not travel to the upper-left neighbour if u[i] <> v[j]. |
152: | If (i > 0) And (j > 0) Then |
153: | If (Mid(u, i, 1) = Mid(v, j, 1)) Then |
154: | If CanGoTo(i, j, -1, -1, ni, nj, cost) Then |
155: | operation = DIFF_COPY & Mid(u, i, 1) |
156: | End If |
157: | End If |
158: | End If |
159: | |
160: | result = operation & result |
161: | count = count + 1 |
162: | i = ni |
163: | j = nj |
164: | Wend |
165: | |
166: | ' at this point we have the 'result' ready |
167: | ' now we will group delete/insert operations to make it nicer |
168: | |
169: | ' we will translate -a+A-b+B-c+C sequences |
170: | ' into -a-b-c+A+B+C |
171: | Dim c As Integer |
172: | Dim base As Integer |
173: | Dim size As Integer |
174: | c = 1 |
175: | While c <= count - 1 |
176: | If (OperationAt(result, c + 0) = DIFF_DELETE) And _ |
177: | (OperationAt(result, c + 1) = DIFF_INSERT) Then |
178: | ' start of the sequence |
179: | base = c |
180: | size = 1 |
181: | Do |
182: | If (OperationAt(result, base + (size * 2) + 0) = DIFF_DELETE) And _ |
183: | (OperationAt(result, base + (size * 2) + 1) = DIFF_INSERT) Then |
184: | size = size + 1 |
185: | Else |
186: | Exit Do |
187: | End If |
188: | Loop |
189: | ' now 'size' is the number of consecutive delete/insert pairs |
190: | ' 'base' is the start index |
191: | Dim newIndex As Integer |
192: | Dim oldIndex As Integer |
193: | newIndex = base + 1 |
194: | oldIndex = base + 2 |
195: | While size > 1 |
196: | Call MoveCode(result, oldIndex, newIndex) |
197: | newIndex = newIndex + 1 |
198: | oldIndex = oldIndex + 2 |
199: | size = size - 1 |
200: | Wend |
201: | c = newIndex |
202: | Else |
203: | c = c + 1 |
204: | End If |
205: | Wend |
206: | |
207: | ' we will translate -a+B+C-d sequences |
208: | ' into -a-d+B+C |
209: | c = 1 |
210: | While c <= count - 1 |
211: | If (OperationAt(result, c + 0) = DIFF_DELETE) And _ |
212: | (OperationAt(result, c + 1) = DIFF_INSERT) Then |
213: | ' possibly start of insert... delete pattern |
214: | base = c |
215: | size = 1 |
216: | Do |
217: | If (OperationAt(result, base + size + 1) = DIFF_INSERT) Then |
218: | size = size + 1 |
219: | Else |
220: | Exit Do |
221: | End If |
222: | Loop |
223: | |
224: | ' now 'size' is the number of consecutive inserts |
225: | If (OperationAt(result, base + size + 1) = DIFF_DELETE) Then |
226: | ' we have delete operation at 'base' and at 'base + size + 1' |
227: | ' in between there are inserts. lets merge those blocks of deletes |
228: | Do |
229: | Call MoveCode(result, base + size + 1, base + 1) |
230: | base = base + 1 |
231: | If OperationAt(result, base + size + 1) <> DIFF_DELETE Then |
232: | c = base + size + 1 |
233: | Exit Do |
234: | End If |
235: | Loop |
236: | Else |
237: | c = base + size + 1 |
238: | End If |
239: | Else |
240: | c = c + 1 |
241: | End If |
242: | Wend |
243: | |
244: | Diff = result |
245: | End Function |