comparison src/fschmidt/util/diff/Diff.java @ 68:00520880ad02

add fschmidt source
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 05 Oct 2025 17:24:15 -0600
parents
children
comparison
equal deleted inserted replaced
67:9d0fefce6985 68:00520880ad02
1 /*
2 Copyright (c) 2008 Franklin Schmidt <fschmidt@gmail.com>
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22
23 package fschmidt.util.diff;
24
25 import java.util.*;
26
27 /*
28 taken from http://www.incava.org/projects/java-diff/
29 */
30
31 /**
32 * Compares two collections, returning a list of the additions, changes, and
33 * deletions between them. A <code>Comparator</code> may be passed as an
34 * argument to the constructor, and will thus be used. If not provided, the
35 * initial value in the <code>a</code> ("from") collection will be looked at to
36 * see if it supports the <code>Comparable</code> interface. If so, its
37 * <code>equals</code> and <code>compareTo</code> methods will be invoked on the
38 * instances in the "from" and "to" collections; otherwise, for speed, hash
39 * codes from the objects will be used instead for comparison.
40 *
41 * <p>The file FileDiff.java shows an example usage of this class, in an
42 * application similar to the Unix "diff" program.</p>
43 */
44 public class Diff
45 {
46 /**
47 * The source array, AKA the "from" values.
48 */
49 protected Object[] a;
50
51 /**
52 * The target array, AKA the "to" values.
53 */
54 protected Object[] b;
55
56 /**
57 * The list of differences, as <code>Difference</code> instances.
58 */
59 protected List<Difference> diffs = new ArrayList<Difference>();
60
61 /**
62 * The pending, uncommitted difference.
63 */
64 private Difference pending;
65
66 /**
67 * The comparator used, if any.
68 */
69 private Comparator<Object> comparator;
70
71 /**
72 * The thresholds.
73 */
74 private TreeMap<Integer,Integer> thresh;
75
76 /**
77 * Constructs the Diff object for the two arrays, using the given comparator.
78 */
79 public Diff(Object[] a, Object[] b, Comparator<Object> comp)
80 {
81 this.a = a;
82 this.b = b;
83 this.comparator = comp;
84 this.thresh = null; // created in getLongestCommonSubsequences
85 }
86
87 /**
88 * Constructs the Diff object for the two arrays, using the default
89 * comparison mechanism between the objects, such as <code>equals</code> and
90 * <code>compareTo</code>.
91 */
92 public Diff(Object[] a, Object[] b)
93 {
94 this(a, b, null);
95 }
96
97 /**
98 * Constructs the Diff object for the two collections, using the given
99 * comparator.
100 */
101 public Diff(Collection a, Collection b, Comparator<Object> comp)
102 {
103 this(a.toArray(), b.toArray(), comp);
104 }
105
106 /**
107 * Constructs the Diff object for the two collections, using the default
108 * comparison mechanism between the objects, such as <code>equals</code> and
109 * <code>compareTo</code>.
110 */
111 public Diff(Collection a, Collection b)
112 {
113 this(a, b, null);
114 }
115
116 /**
117 * Runs diff and returns the results.
118 */
119 public List<Difference> diff()
120 {
121 traverseSequences();
122
123 // add the last difference, if pending:
124 if (pending != null) {
125 diffs.add(pending);
126 }
127
128 return diffs;
129 }
130
131 /**
132 * Traverses the sequences, seeking the longest common subsequences,
133 * invoking the methods <code>finishedA</code>, <code>finishedB</code>,
134 * <code>onANotB</code>, and <code>onBNotA</code>.
135 */
136 protected void traverseSequences()
137 {
138 Integer[] matches = getLongestCommonSubsequences();
139
140 int lastA = a.length - 1;
141 int lastB = b.length - 1;
142 int bi = 0;
143 int ai;
144
145 int lastMatch = matches.length - 1;
146
147 for (ai = 0; ai <= lastMatch; ++ai) {
148 Integer bLine = matches[ai];
149
150 if (bLine == null) {
151 onANotB(ai, bi);
152 }
153 else {
154 while (bi < bLine.intValue()) {
155 onBNotA(ai, bi++);
156 }
157
158 onMatch(ai, bi++);
159 }
160 }
161
162 boolean calledFinishA = false;
163 boolean calledFinishB = false;
164
165 while (ai <= lastA || bi <= lastB) {
166
167 // last A?
168 if (ai == lastA + 1 && bi <= lastB) {
169 if (!calledFinishA && callFinishedA()) {
170 finishedA(lastA);
171 calledFinishA = true;
172 }
173 else {
174 while (bi <= lastB) {
175 onBNotA(ai, bi++);
176 }
177 }
178 }
179
180 // last B?
181 if (bi == lastB + 1 && ai <= lastA) {
182 if (!calledFinishB && callFinishedB()) {
183 finishedB(lastB);
184 calledFinishB = true;
185 }
186 else {
187 while (ai <= lastA) {
188 onANotB(ai++, bi);
189 }
190 }
191 }
192
193 if (ai <= lastA) {
194 onANotB(ai++, bi);
195 }
196
197 if (bi <= lastB) {
198 onBNotA(ai, bi++);
199 }
200 }
201 }
202
203 /**
204 * Override and return true in order to have <code>finishedA</code> invoked
205 * at the last element in the <code>a</code> array.
206 */
207 protected boolean callFinishedA()
208 {
209 return false;
210 }
211
212 /**
213 * Override and return true in order to have <code>finishedB</code> invoked
214 * at the last element in the <code>b</code> array.
215 */
216 protected boolean callFinishedB()
217 {
218 return false;
219 }
220
221 /**
222 * Invoked at the last element in <code>a</code>, if
223 * <code>callFinishedA</code> returns true.
224 */
225 protected void finishedA(int lastA)
226 {
227 }
228
229 /**
230 * Invoked at the last element in <code>b</code>, if
231 * <code>callFinishedB</code> returns true.
232 */
233 protected void finishedB(int lastB)
234 {
235 }
236
237 /**
238 * Invoked for elements in <code>a</code> and not in <code>b</code>.
239 */
240 protected void onANotB(int ai, int bi)
241 {
242 if (pending == null) {
243 pending = new Difference(ai, ai, bi, -1);
244 }
245 else {
246 pending.setDeleted(ai);
247 }
248 }
249
250 /**
251 * Invoked for elements in <code>b</code> and not in <code>a</code>.
252 */
253 protected void onBNotA(int ai, int bi)
254 {
255 if (pending == null) {
256 pending = new Difference(ai, -1, bi, bi);
257 }
258 else {
259 pending.setAdded(bi);
260 }
261 }
262
263 /**
264 * Invoked for elements matching in <code>a</code> and <code>b</code>.
265 */
266 protected void onMatch(int ai, int bi)
267 {
268 if (pending == null) {
269 // no current pending
270 }
271 else {
272 diffs.add(pending);
273 pending = null;
274 }
275 }
276
277 /**
278 * Compares the two objects, using the comparator provided with the
279 * constructor, if any.
280 */
281 protected boolean equals(Object x, Object y)
282 {
283 return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0;
284 }
285
286 /**
287 * Returns an array of the longest common subsequences.
288 */
289 public Integer[] getLongestCommonSubsequences()
290 {
291 int aStart = 0;
292 int aEnd = a.length - 1;
293
294 int bStart = 0;
295 int bEnd = b.length - 1;
296
297 TreeMap<Integer,Integer> matches = new TreeMap<Integer,Integer>();
298
299 while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) {
300 matches.put(new Integer(aStart++), new Integer(bStart++));
301 }
302
303 while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) {
304 matches.put(new Integer(aEnd--), new Integer(bEnd--));
305 }
306
307 Map<Object,List<Integer>> bMatches = null;
308 if (comparator == null) {
309 if (a.length > 0 && a[0] instanceof Comparable) {
310 // this uses the Comparable interface
311 bMatches = new TreeMap<Object,List<Integer>>();
312 }
313 else {
314 // this just uses hashCode()
315 bMatches = new HashMap<Object,List<Integer>>();
316 }
317 }
318 else {
319 // we don't really want them sorted, but this is the only Map
320 // implementation (as of JDK 1.4) that takes a comparator.
321 bMatches = new TreeMap<Object,List<Integer>>(comparator);
322 }
323
324 for (int bi = bStart; bi <= bEnd; ++bi) {
325 Object element = b[bi];
326 Object key = element;
327 List<Integer> positions = bMatches.get(key);
328 if (positions == null) {
329 positions = new ArrayList<Integer>();
330 bMatches.put(key, positions);
331 }
332 positions.add(new Integer(bi));
333 }
334
335 thresh = new TreeMap<Integer,Integer>();
336 Map<Object,Object[]> links = new HashMap<Object,Object[]>();
337
338 for (int i = aStart; i <= aEnd; ++i) {
339 Object aElement = a[i]; // keygen here.
340 List<Integer> positions = bMatches.get(aElement);
341
342 if (positions != null) {
343 Integer k = new Integer(0);
344 ListIterator<Integer> pit = positions.listIterator(positions.size());
345 while (pit.hasPrevious()) {
346 Integer j = pit.previous();
347
348 k = insert(j, k);
349
350 if (k == null) {
351 // nothing
352 }
353 else {
354 Object value = k.intValue() > 0 ? links.get(new Integer(k.intValue() - 1)) : null;
355 links.put(k, new Object[] { value, new Integer(i), j });
356 }
357 }
358 }
359 }
360
361 if (thresh.size() > 0) {
362 Integer ti = thresh.lastKey();
363 Object[] link = links.get(ti);
364 while (link != null) {
365 Integer x = (Integer)link[1];
366 Integer y = (Integer)link[2];
367 matches.put(x, y);
368 link = (Object[])link[0];
369 }
370 }
371
372 return toArray(matches);
373 }
374
375 /**
376 * Converts the map (indexed by java.lang.Integers) into an array.
377 */
378 protected static Integer[] toArray(TreeMap map)
379 {
380 int size = map.size() == 0 ? 0 : 1 + ((Integer)map.lastKey()).intValue();
381 Integer[] ary = new Integer[size];
382 Iterator it = map.keySet().iterator();
383
384 while (it.hasNext()) {
385 Integer idx = (Integer)it.next();
386 Integer val = (Integer)map.get(idx);
387 ary[idx.intValue()] = val;
388 }
389 return ary;
390 }
391
392 /**
393 * Returns whether the integer is not zero (including if it is not null).
394 */
395 protected static boolean isNonzero(Integer i)
396 {
397 return i != null && i.intValue() != 0;
398 }
399
400 /**
401 * Returns whether the value in the map for the given index is greater than
402 * the given value.
403 */
404 protected boolean isGreaterThan(Integer index, Integer val)
405 {
406 Integer lhs = thresh.get(index);
407 return lhs != null && val != null && lhs.compareTo(val) > 0;
408 }
409
410 /**
411 * Returns whether the value in the map for the given index is less than
412 * the given value.
413 */
414 protected boolean isLessThan(Integer index, Integer val)
415 {
416 Integer lhs = thresh.get(index);
417 return lhs != null && (val == null || lhs.compareTo(val) < 0);
418 }
419
420 /**
421 * Returns the value for the greatest key in the map.
422 */
423 protected Integer getLastValue()
424 {
425 return thresh.get(thresh.lastKey());
426 }
427
428 /**
429 * Adds the given value to the "end" of the threshold map, that is, with the
430 * greatest index/key.
431 */
432 protected void append(Integer value)
433 {
434 Integer addIdx = null;
435 if (thresh.size() == 0) {
436 addIdx = new Integer(0);
437 }
438 else {
439 Integer lastKey = thresh.lastKey();
440 addIdx = new Integer(lastKey.intValue() + 1);
441 }
442 thresh.put(addIdx, value);
443 }
444
445 /**
446 * Inserts the given values into the threshold map.
447 */
448 protected Integer insert(Integer j, Integer k)
449 {
450 if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(new Integer(k.intValue() - 1), j)) {
451 thresh.put(k, j);
452 }
453 else {
454 int hi = -1;
455
456 if (isNonzero(k)) {
457 hi = k.intValue();
458 }
459 else if (thresh.size() > 0) {
460 hi = thresh.lastKey().intValue();
461 }
462
463 // off the end?
464 if (hi == -1 || j.compareTo(getLastValue()) > 0) {
465 append(j);
466 k = new Integer(hi + 1);
467 }
468 else {
469 // binary search for insertion point:
470 int lo = 0;
471
472 while (lo <= hi) {
473 int index = (hi + lo) / 2;
474 Integer val = thresh.get(new Integer(index));
475 int cmp = j.compareTo(val);
476
477 if (cmp == 0) {
478 return null;
479 }
480 else if (cmp > 0) {
481 lo = index + 1;
482 }
483 else {
484 hi = index - 1;
485 }
486 }
487
488 thresh.put(new Integer(lo), j);
489 k = new Integer(lo);
490 }
491 }
492
493 return k;
494 }
495
496 }