Mercurial Hosting > nabble
comparison src/fschmidt/util/diff/Diff.java @ 68:00520880ad02
add fschmidt source
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 05 Oct 2025 17:24:15 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:9d0fefce6985 | 68:00520880ad02 |
---|---|
1 /* | |
2 Copyright (c) 2008 Franklin Schmidt <fschmidt@gmail.com> | |
3 | |
4 Permission is hereby granted, free of charge, to any person obtaining a copy | |
5 of this software and associated documentation files (the "Software"), to deal | |
6 in the Software without restriction, including without limitation the rights | |
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
8 copies of the Software, and to permit persons to whom the Software is | |
9 furnished to do so, subject to the following conditions: | |
10 | |
11 The above copyright notice and this permission notice shall be included in | |
12 all copies or substantial portions of the Software. | |
13 | |
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
20 THE SOFTWARE. | |
21 */ | |
22 | |
23 package fschmidt.util.diff; | |
24 | |
25 import java.util.*; | |
26 | |
27 /* | |
28 taken from http://www.incava.org/projects/java-diff/ | |
29 */ | |
30 | |
31 /** | |
32 * Compares two collections, returning a list of the additions, changes, and | |
33 * deletions between them. A <code>Comparator</code> may be passed as an | |
34 * argument to the constructor, and will thus be used. If not provided, the | |
35 * initial value in the <code>a</code> ("from") collection will be looked at to | |
36 * see if it supports the <code>Comparable</code> interface. If so, its | |
37 * <code>equals</code> and <code>compareTo</code> methods will be invoked on the | |
38 * instances in the "from" and "to" collections; otherwise, for speed, hash | |
39 * codes from the objects will be used instead for comparison. | |
40 * | |
41 * <p>The file FileDiff.java shows an example usage of this class, in an | |
42 * application similar to the Unix "diff" program.</p> | |
43 */ | |
44 public class Diff | |
45 { | |
46 /** | |
47 * The source array, AKA the "from" values. | |
48 */ | |
49 protected Object[] a; | |
50 | |
51 /** | |
52 * The target array, AKA the "to" values. | |
53 */ | |
54 protected Object[] b; | |
55 | |
56 /** | |
57 * The list of differences, as <code>Difference</code> instances. | |
58 */ | |
59 protected List<Difference> diffs = new ArrayList<Difference>(); | |
60 | |
61 /** | |
62 * The pending, uncommitted difference. | |
63 */ | |
64 private Difference pending; | |
65 | |
66 /** | |
67 * The comparator used, if any. | |
68 */ | |
69 private Comparator<Object> comparator; | |
70 | |
71 /** | |
72 * The thresholds. | |
73 */ | |
74 private TreeMap<Integer,Integer> thresh; | |
75 | |
76 /** | |
77 * Constructs the Diff object for the two arrays, using the given comparator. | |
78 */ | |
79 public Diff(Object[] a, Object[] b, Comparator<Object> comp) | |
80 { | |
81 this.a = a; | |
82 this.b = b; | |
83 this.comparator = comp; | |
84 this.thresh = null; // created in getLongestCommonSubsequences | |
85 } | |
86 | |
87 /** | |
88 * Constructs the Diff object for the two arrays, using the default | |
89 * comparison mechanism between the objects, such as <code>equals</code> and | |
90 * <code>compareTo</code>. | |
91 */ | |
92 public Diff(Object[] a, Object[] b) | |
93 { | |
94 this(a, b, null); | |
95 } | |
96 | |
97 /** | |
98 * Constructs the Diff object for the two collections, using the given | |
99 * comparator. | |
100 */ | |
101 public Diff(Collection a, Collection b, Comparator<Object> comp) | |
102 { | |
103 this(a.toArray(), b.toArray(), comp); | |
104 } | |
105 | |
106 /** | |
107 * Constructs the Diff object for the two collections, using the default | |
108 * comparison mechanism between the objects, such as <code>equals</code> and | |
109 * <code>compareTo</code>. | |
110 */ | |
111 public Diff(Collection a, Collection b) | |
112 { | |
113 this(a, b, null); | |
114 } | |
115 | |
116 /** | |
117 * Runs diff and returns the results. | |
118 */ | |
119 public List<Difference> diff() | |
120 { | |
121 traverseSequences(); | |
122 | |
123 // add the last difference, if pending: | |
124 if (pending != null) { | |
125 diffs.add(pending); | |
126 } | |
127 | |
128 return diffs; | |
129 } | |
130 | |
131 /** | |
132 * Traverses the sequences, seeking the longest common subsequences, | |
133 * invoking the methods <code>finishedA</code>, <code>finishedB</code>, | |
134 * <code>onANotB</code>, and <code>onBNotA</code>. | |
135 */ | |
136 protected void traverseSequences() | |
137 { | |
138 Integer[] matches = getLongestCommonSubsequences(); | |
139 | |
140 int lastA = a.length - 1; | |
141 int lastB = b.length - 1; | |
142 int bi = 0; | |
143 int ai; | |
144 | |
145 int lastMatch = matches.length - 1; | |
146 | |
147 for (ai = 0; ai <= lastMatch; ++ai) { | |
148 Integer bLine = matches[ai]; | |
149 | |
150 if (bLine == null) { | |
151 onANotB(ai, bi); | |
152 } | |
153 else { | |
154 while (bi < bLine.intValue()) { | |
155 onBNotA(ai, bi++); | |
156 } | |
157 | |
158 onMatch(ai, bi++); | |
159 } | |
160 } | |
161 | |
162 boolean calledFinishA = false; | |
163 boolean calledFinishB = false; | |
164 | |
165 while (ai <= lastA || bi <= lastB) { | |
166 | |
167 // last A? | |
168 if (ai == lastA + 1 && bi <= lastB) { | |
169 if (!calledFinishA && callFinishedA()) { | |
170 finishedA(lastA); | |
171 calledFinishA = true; | |
172 } | |
173 else { | |
174 while (bi <= lastB) { | |
175 onBNotA(ai, bi++); | |
176 } | |
177 } | |
178 } | |
179 | |
180 // last B? | |
181 if (bi == lastB + 1 && ai <= lastA) { | |
182 if (!calledFinishB && callFinishedB()) { | |
183 finishedB(lastB); | |
184 calledFinishB = true; | |
185 } | |
186 else { | |
187 while (ai <= lastA) { | |
188 onANotB(ai++, bi); | |
189 } | |
190 } | |
191 } | |
192 | |
193 if (ai <= lastA) { | |
194 onANotB(ai++, bi); | |
195 } | |
196 | |
197 if (bi <= lastB) { | |
198 onBNotA(ai, bi++); | |
199 } | |
200 } | |
201 } | |
202 | |
203 /** | |
204 * Override and return true in order to have <code>finishedA</code> invoked | |
205 * at the last element in the <code>a</code> array. | |
206 */ | |
207 protected boolean callFinishedA() | |
208 { | |
209 return false; | |
210 } | |
211 | |
212 /** | |
213 * Override and return true in order to have <code>finishedB</code> invoked | |
214 * at the last element in the <code>b</code> array. | |
215 */ | |
216 protected boolean callFinishedB() | |
217 { | |
218 return false; | |
219 } | |
220 | |
221 /** | |
222 * Invoked at the last element in <code>a</code>, if | |
223 * <code>callFinishedA</code> returns true. | |
224 */ | |
225 protected void finishedA(int lastA) | |
226 { | |
227 } | |
228 | |
229 /** | |
230 * Invoked at the last element in <code>b</code>, if | |
231 * <code>callFinishedB</code> returns true. | |
232 */ | |
233 protected void finishedB(int lastB) | |
234 { | |
235 } | |
236 | |
237 /** | |
238 * Invoked for elements in <code>a</code> and not in <code>b</code>. | |
239 */ | |
240 protected void onANotB(int ai, int bi) | |
241 { | |
242 if (pending == null) { | |
243 pending = new Difference(ai, ai, bi, -1); | |
244 } | |
245 else { | |
246 pending.setDeleted(ai); | |
247 } | |
248 } | |
249 | |
250 /** | |
251 * Invoked for elements in <code>b</code> and not in <code>a</code>. | |
252 */ | |
253 protected void onBNotA(int ai, int bi) | |
254 { | |
255 if (pending == null) { | |
256 pending = new Difference(ai, -1, bi, bi); | |
257 } | |
258 else { | |
259 pending.setAdded(bi); | |
260 } | |
261 } | |
262 | |
263 /** | |
264 * Invoked for elements matching in <code>a</code> and <code>b</code>. | |
265 */ | |
266 protected void onMatch(int ai, int bi) | |
267 { | |
268 if (pending == null) { | |
269 // no current pending | |
270 } | |
271 else { | |
272 diffs.add(pending); | |
273 pending = null; | |
274 } | |
275 } | |
276 | |
277 /** | |
278 * Compares the two objects, using the comparator provided with the | |
279 * constructor, if any. | |
280 */ | |
281 protected boolean equals(Object x, Object y) | |
282 { | |
283 return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0; | |
284 } | |
285 | |
286 /** | |
287 * Returns an array of the longest common subsequences. | |
288 */ | |
289 public Integer[] getLongestCommonSubsequences() | |
290 { | |
291 int aStart = 0; | |
292 int aEnd = a.length - 1; | |
293 | |
294 int bStart = 0; | |
295 int bEnd = b.length - 1; | |
296 | |
297 TreeMap<Integer,Integer> matches = new TreeMap<Integer,Integer>(); | |
298 | |
299 while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) { | |
300 matches.put(new Integer(aStart++), new Integer(bStart++)); | |
301 } | |
302 | |
303 while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) { | |
304 matches.put(new Integer(aEnd--), new Integer(bEnd--)); | |
305 } | |
306 | |
307 Map<Object,List<Integer>> bMatches = null; | |
308 if (comparator == null) { | |
309 if (a.length > 0 && a[0] instanceof Comparable) { | |
310 // this uses the Comparable interface | |
311 bMatches = new TreeMap<Object,List<Integer>>(); | |
312 } | |
313 else { | |
314 // this just uses hashCode() | |
315 bMatches = new HashMap<Object,List<Integer>>(); | |
316 } | |
317 } | |
318 else { | |
319 // we don't really want them sorted, but this is the only Map | |
320 // implementation (as of JDK 1.4) that takes a comparator. | |
321 bMatches = new TreeMap<Object,List<Integer>>(comparator); | |
322 } | |
323 | |
324 for (int bi = bStart; bi <= bEnd; ++bi) { | |
325 Object element = b[bi]; | |
326 Object key = element; | |
327 List<Integer> positions = bMatches.get(key); | |
328 if (positions == null) { | |
329 positions = new ArrayList<Integer>(); | |
330 bMatches.put(key, positions); | |
331 } | |
332 positions.add(new Integer(bi)); | |
333 } | |
334 | |
335 thresh = new TreeMap<Integer,Integer>(); | |
336 Map<Object,Object[]> links = new HashMap<Object,Object[]>(); | |
337 | |
338 for (int i = aStart; i <= aEnd; ++i) { | |
339 Object aElement = a[i]; // keygen here. | |
340 List<Integer> positions = bMatches.get(aElement); | |
341 | |
342 if (positions != null) { | |
343 Integer k = new Integer(0); | |
344 ListIterator<Integer> pit = positions.listIterator(positions.size()); | |
345 while (pit.hasPrevious()) { | |
346 Integer j = pit.previous(); | |
347 | |
348 k = insert(j, k); | |
349 | |
350 if (k == null) { | |
351 // nothing | |
352 } | |
353 else { | |
354 Object value = k.intValue() > 0 ? links.get(new Integer(k.intValue() - 1)) : null; | |
355 links.put(k, new Object[] { value, new Integer(i), j }); | |
356 } | |
357 } | |
358 } | |
359 } | |
360 | |
361 if (thresh.size() > 0) { | |
362 Integer ti = thresh.lastKey(); | |
363 Object[] link = links.get(ti); | |
364 while (link != null) { | |
365 Integer x = (Integer)link[1]; | |
366 Integer y = (Integer)link[2]; | |
367 matches.put(x, y); | |
368 link = (Object[])link[0]; | |
369 } | |
370 } | |
371 | |
372 return toArray(matches); | |
373 } | |
374 | |
375 /** | |
376 * Converts the map (indexed by java.lang.Integers) into an array. | |
377 */ | |
378 protected static Integer[] toArray(TreeMap map) | |
379 { | |
380 int size = map.size() == 0 ? 0 : 1 + ((Integer)map.lastKey()).intValue(); | |
381 Integer[] ary = new Integer[size]; | |
382 Iterator it = map.keySet().iterator(); | |
383 | |
384 while (it.hasNext()) { | |
385 Integer idx = (Integer)it.next(); | |
386 Integer val = (Integer)map.get(idx); | |
387 ary[idx.intValue()] = val; | |
388 } | |
389 return ary; | |
390 } | |
391 | |
392 /** | |
393 * Returns whether the integer is not zero (including if it is not null). | |
394 */ | |
395 protected static boolean isNonzero(Integer i) | |
396 { | |
397 return i != null && i.intValue() != 0; | |
398 } | |
399 | |
400 /** | |
401 * Returns whether the value in the map for the given index is greater than | |
402 * the given value. | |
403 */ | |
404 protected boolean isGreaterThan(Integer index, Integer val) | |
405 { | |
406 Integer lhs = thresh.get(index); | |
407 return lhs != null && val != null && lhs.compareTo(val) > 0; | |
408 } | |
409 | |
410 /** | |
411 * Returns whether the value in the map for the given index is less than | |
412 * the given value. | |
413 */ | |
414 protected boolean isLessThan(Integer index, Integer val) | |
415 { | |
416 Integer lhs = thresh.get(index); | |
417 return lhs != null && (val == null || lhs.compareTo(val) < 0); | |
418 } | |
419 | |
420 /** | |
421 * Returns the value for the greatest key in the map. | |
422 */ | |
423 protected Integer getLastValue() | |
424 { | |
425 return thresh.get(thresh.lastKey()); | |
426 } | |
427 | |
428 /** | |
429 * Adds the given value to the "end" of the threshold map, that is, with the | |
430 * greatest index/key. | |
431 */ | |
432 protected void append(Integer value) | |
433 { | |
434 Integer addIdx = null; | |
435 if (thresh.size() == 0) { | |
436 addIdx = new Integer(0); | |
437 } | |
438 else { | |
439 Integer lastKey = thresh.lastKey(); | |
440 addIdx = new Integer(lastKey.intValue() + 1); | |
441 } | |
442 thresh.put(addIdx, value); | |
443 } | |
444 | |
445 /** | |
446 * Inserts the given values into the threshold map. | |
447 */ | |
448 protected Integer insert(Integer j, Integer k) | |
449 { | |
450 if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(new Integer(k.intValue() - 1), j)) { | |
451 thresh.put(k, j); | |
452 } | |
453 else { | |
454 int hi = -1; | |
455 | |
456 if (isNonzero(k)) { | |
457 hi = k.intValue(); | |
458 } | |
459 else if (thresh.size() > 0) { | |
460 hi = thresh.lastKey().intValue(); | |
461 } | |
462 | |
463 // off the end? | |
464 if (hi == -1 || j.compareTo(getLastValue()) > 0) { | |
465 append(j); | |
466 k = new Integer(hi + 1); | |
467 } | |
468 else { | |
469 // binary search for insertion point: | |
470 int lo = 0; | |
471 | |
472 while (lo <= hi) { | |
473 int index = (hi + lo) / 2; | |
474 Integer val = thresh.get(new Integer(index)); | |
475 int cmp = j.compareTo(val); | |
476 | |
477 if (cmp == 0) { | |
478 return null; | |
479 } | |
480 else if (cmp > 0) { | |
481 lo = index + 1; | |
482 } | |
483 else { | |
484 hi = index - 1; | |
485 } | |
486 } | |
487 | |
488 thresh.put(new Integer(lo), j); | |
489 k = new Integer(lo); | |
490 } | |
491 } | |
492 | |
493 return k; | |
494 } | |
495 | |
496 } |