68
|
1 /*
|
|
2 Copyright (c) 2008 Franklin Schmidt <fschmidt@gmail.com>
|
|
3
|
|
4 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5 of this software and associated documentation files (the "Software"), to deal
|
|
6 in the Software without restriction, including without limitation the rights
|
|
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8 copies of the Software, and to permit persons to whom the Software is
|
|
9 furnished to do so, subject to the following conditions:
|
|
10
|
|
11 The above copyright notice and this permission notice shall be included in
|
|
12 all copies or substantial portions of the Software.
|
|
13
|
|
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20 THE SOFTWARE.
|
|
21 */
|
|
22
|
|
23 package fschmidt.util.diff;
|
|
24
|
|
25 import java.util.*;
|
|
26
|
|
27 /*
|
|
28 taken from http://www.incava.org/projects/java-diff/
|
|
29 */
|
|
30
|
|
31 /**
|
|
32 * Compares two collections, returning a list of the additions, changes, and
|
|
33 * deletions between them. A <code>Comparator</code> may be passed as an
|
|
34 * argument to the constructor, and will thus be used. If not provided, the
|
|
35 * initial value in the <code>a</code> ("from") collection will be looked at to
|
|
36 * see if it supports the <code>Comparable</code> interface. If so, its
|
|
37 * <code>equals</code> and <code>compareTo</code> methods will be invoked on the
|
|
38 * instances in the "from" and "to" collections; otherwise, for speed, hash
|
|
39 * codes from the objects will be used instead for comparison.
|
|
40 *
|
|
41 * <p>The file FileDiff.java shows an example usage of this class, in an
|
|
42 * application similar to the Unix "diff" program.</p>
|
|
43 */
|
|
44 public class Diff
|
|
45 {
|
|
46 /**
|
|
47 * The source array, AKA the "from" values.
|
|
48 */
|
|
49 protected Object[] a;
|
|
50
|
|
51 /**
|
|
52 * The target array, AKA the "to" values.
|
|
53 */
|
|
54 protected Object[] b;
|
|
55
|
|
56 /**
|
|
57 * The list of differences, as <code>Difference</code> instances.
|
|
58 */
|
|
59 protected List<Difference> diffs = new ArrayList<Difference>();
|
|
60
|
|
61 /**
|
|
62 * The pending, uncommitted difference.
|
|
63 */
|
|
64 private Difference pending;
|
|
65
|
|
66 /**
|
|
67 * The comparator used, if any.
|
|
68 */
|
|
69 private Comparator<Object> comparator;
|
|
70
|
|
71 /**
|
|
72 * The thresholds.
|
|
73 */
|
|
74 private TreeMap<Integer,Integer> thresh;
|
|
75
|
|
76 /**
|
|
77 * Constructs the Diff object for the two arrays, using the given comparator.
|
|
78 */
|
|
79 public Diff(Object[] a, Object[] b, Comparator<Object> comp)
|
|
80 {
|
|
81 this.a = a;
|
|
82 this.b = b;
|
|
83 this.comparator = comp;
|
|
84 this.thresh = null; // created in getLongestCommonSubsequences
|
|
85 }
|
|
86
|
|
87 /**
|
|
88 * Constructs the Diff object for the two arrays, using the default
|
|
89 * comparison mechanism between the objects, such as <code>equals</code> and
|
|
90 * <code>compareTo</code>.
|
|
91 */
|
|
92 public Diff(Object[] a, Object[] b)
|
|
93 {
|
|
94 this(a, b, null);
|
|
95 }
|
|
96
|
|
97 /**
|
|
98 * Constructs the Diff object for the two collections, using the given
|
|
99 * comparator.
|
|
100 */
|
|
101 public Diff(Collection a, Collection b, Comparator<Object> comp)
|
|
102 {
|
|
103 this(a.toArray(), b.toArray(), comp);
|
|
104 }
|
|
105
|
|
106 /**
|
|
107 * Constructs the Diff object for the two collections, using the default
|
|
108 * comparison mechanism between the objects, such as <code>equals</code> and
|
|
109 * <code>compareTo</code>.
|
|
110 */
|
|
111 public Diff(Collection a, Collection b)
|
|
112 {
|
|
113 this(a, b, null);
|
|
114 }
|
|
115
|
|
116 /**
|
|
117 * Runs diff and returns the results.
|
|
118 */
|
|
119 public List<Difference> diff()
|
|
120 {
|
|
121 traverseSequences();
|
|
122
|
|
123 // add the last difference, if pending:
|
|
124 if (pending != null) {
|
|
125 diffs.add(pending);
|
|
126 }
|
|
127
|
|
128 return diffs;
|
|
129 }
|
|
130
|
|
131 /**
|
|
132 * Traverses the sequences, seeking the longest common subsequences,
|
|
133 * invoking the methods <code>finishedA</code>, <code>finishedB</code>,
|
|
134 * <code>onANotB</code>, and <code>onBNotA</code>.
|
|
135 */
|
|
136 protected void traverseSequences()
|
|
137 {
|
|
138 Integer[] matches = getLongestCommonSubsequences();
|
|
139
|
|
140 int lastA = a.length - 1;
|
|
141 int lastB = b.length - 1;
|
|
142 int bi = 0;
|
|
143 int ai;
|
|
144
|
|
145 int lastMatch = matches.length - 1;
|
|
146
|
|
147 for (ai = 0; ai <= lastMatch; ++ai) {
|
|
148 Integer bLine = matches[ai];
|
|
149
|
|
150 if (bLine == null) {
|
|
151 onANotB(ai, bi);
|
|
152 }
|
|
153 else {
|
|
154 while (bi < bLine.intValue()) {
|
|
155 onBNotA(ai, bi++);
|
|
156 }
|
|
157
|
|
158 onMatch(ai, bi++);
|
|
159 }
|
|
160 }
|
|
161
|
|
162 boolean calledFinishA = false;
|
|
163 boolean calledFinishB = false;
|
|
164
|
|
165 while (ai <= lastA || bi <= lastB) {
|
|
166
|
|
167 // last A?
|
|
168 if (ai == lastA + 1 && bi <= lastB) {
|
|
169 if (!calledFinishA && callFinishedA()) {
|
|
170 finishedA(lastA);
|
|
171 calledFinishA = true;
|
|
172 }
|
|
173 else {
|
|
174 while (bi <= lastB) {
|
|
175 onBNotA(ai, bi++);
|
|
176 }
|
|
177 }
|
|
178 }
|
|
179
|
|
180 // last B?
|
|
181 if (bi == lastB + 1 && ai <= lastA) {
|
|
182 if (!calledFinishB && callFinishedB()) {
|
|
183 finishedB(lastB);
|
|
184 calledFinishB = true;
|
|
185 }
|
|
186 else {
|
|
187 while (ai <= lastA) {
|
|
188 onANotB(ai++, bi);
|
|
189 }
|
|
190 }
|
|
191 }
|
|
192
|
|
193 if (ai <= lastA) {
|
|
194 onANotB(ai++, bi);
|
|
195 }
|
|
196
|
|
197 if (bi <= lastB) {
|
|
198 onBNotA(ai, bi++);
|
|
199 }
|
|
200 }
|
|
201 }
|
|
202
|
|
203 /**
|
|
204 * Override and return true in order to have <code>finishedA</code> invoked
|
|
205 * at the last element in the <code>a</code> array.
|
|
206 */
|
|
207 protected boolean callFinishedA()
|
|
208 {
|
|
209 return false;
|
|
210 }
|
|
211
|
|
212 /**
|
|
213 * Override and return true in order to have <code>finishedB</code> invoked
|
|
214 * at the last element in the <code>b</code> array.
|
|
215 */
|
|
216 protected boolean callFinishedB()
|
|
217 {
|
|
218 return false;
|
|
219 }
|
|
220
|
|
221 /**
|
|
222 * Invoked at the last element in <code>a</code>, if
|
|
223 * <code>callFinishedA</code> returns true.
|
|
224 */
|
|
225 protected void finishedA(int lastA)
|
|
226 {
|
|
227 }
|
|
228
|
|
229 /**
|
|
230 * Invoked at the last element in <code>b</code>, if
|
|
231 * <code>callFinishedB</code> returns true.
|
|
232 */
|
|
233 protected void finishedB(int lastB)
|
|
234 {
|
|
235 }
|
|
236
|
|
237 /**
|
|
238 * Invoked for elements in <code>a</code> and not in <code>b</code>.
|
|
239 */
|
|
240 protected void onANotB(int ai, int bi)
|
|
241 {
|
|
242 if (pending == null) {
|
|
243 pending = new Difference(ai, ai, bi, -1);
|
|
244 }
|
|
245 else {
|
|
246 pending.setDeleted(ai);
|
|
247 }
|
|
248 }
|
|
249
|
|
250 /**
|
|
251 * Invoked for elements in <code>b</code> and not in <code>a</code>.
|
|
252 */
|
|
253 protected void onBNotA(int ai, int bi)
|
|
254 {
|
|
255 if (pending == null) {
|
|
256 pending = new Difference(ai, -1, bi, bi);
|
|
257 }
|
|
258 else {
|
|
259 pending.setAdded(bi);
|
|
260 }
|
|
261 }
|
|
262
|
|
263 /**
|
|
264 * Invoked for elements matching in <code>a</code> and <code>b</code>.
|
|
265 */
|
|
266 protected void onMatch(int ai, int bi)
|
|
267 {
|
|
268 if (pending == null) {
|
|
269 // no current pending
|
|
270 }
|
|
271 else {
|
|
272 diffs.add(pending);
|
|
273 pending = null;
|
|
274 }
|
|
275 }
|
|
276
|
|
277 /**
|
|
278 * Compares the two objects, using the comparator provided with the
|
|
279 * constructor, if any.
|
|
280 */
|
|
281 protected boolean equals(Object x, Object y)
|
|
282 {
|
|
283 return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0;
|
|
284 }
|
|
285
|
|
286 /**
|
|
287 * Returns an array of the longest common subsequences.
|
|
288 */
|
|
289 public Integer[] getLongestCommonSubsequences()
|
|
290 {
|
|
291 int aStart = 0;
|
|
292 int aEnd = a.length - 1;
|
|
293
|
|
294 int bStart = 0;
|
|
295 int bEnd = b.length - 1;
|
|
296
|
|
297 TreeMap<Integer,Integer> matches = new TreeMap<Integer,Integer>();
|
|
298
|
|
299 while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) {
|
|
300 matches.put(new Integer(aStart++), new Integer(bStart++));
|
|
301 }
|
|
302
|
|
303 while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) {
|
|
304 matches.put(new Integer(aEnd--), new Integer(bEnd--));
|
|
305 }
|
|
306
|
|
307 Map<Object,List<Integer>> bMatches = null;
|
|
308 if (comparator == null) {
|
|
309 if (a.length > 0 && a[0] instanceof Comparable) {
|
|
310 // this uses the Comparable interface
|
|
311 bMatches = new TreeMap<Object,List<Integer>>();
|
|
312 }
|
|
313 else {
|
|
314 // this just uses hashCode()
|
|
315 bMatches = new HashMap<Object,List<Integer>>();
|
|
316 }
|
|
317 }
|
|
318 else {
|
|
319 // we don't really want them sorted, but this is the only Map
|
|
320 // implementation (as of JDK 1.4) that takes a comparator.
|
|
321 bMatches = new TreeMap<Object,List<Integer>>(comparator);
|
|
322 }
|
|
323
|
|
324 for (int bi = bStart; bi <= bEnd; ++bi) {
|
|
325 Object element = b[bi];
|
|
326 Object key = element;
|
|
327 List<Integer> positions = bMatches.get(key);
|
|
328 if (positions == null) {
|
|
329 positions = new ArrayList<Integer>();
|
|
330 bMatches.put(key, positions);
|
|
331 }
|
|
332 positions.add(new Integer(bi));
|
|
333 }
|
|
334
|
|
335 thresh = new TreeMap<Integer,Integer>();
|
|
336 Map<Object,Object[]> links = new HashMap<Object,Object[]>();
|
|
337
|
|
338 for (int i = aStart; i <= aEnd; ++i) {
|
|
339 Object aElement = a[i]; // keygen here.
|
|
340 List<Integer> positions = bMatches.get(aElement);
|
|
341
|
|
342 if (positions != null) {
|
|
343 Integer k = new Integer(0);
|
|
344 ListIterator<Integer> pit = positions.listIterator(positions.size());
|
|
345 while (pit.hasPrevious()) {
|
|
346 Integer j = pit.previous();
|
|
347
|
|
348 k = insert(j, k);
|
|
349
|
|
350 if (k == null) {
|
|
351 // nothing
|
|
352 }
|
|
353 else {
|
|
354 Object value = k.intValue() > 0 ? links.get(new Integer(k.intValue() - 1)) : null;
|
|
355 links.put(k, new Object[] { value, new Integer(i), j });
|
|
356 }
|
|
357 }
|
|
358 }
|
|
359 }
|
|
360
|
|
361 if (thresh.size() > 0) {
|
|
362 Integer ti = thresh.lastKey();
|
|
363 Object[] link = links.get(ti);
|
|
364 while (link != null) {
|
|
365 Integer x = (Integer)link[1];
|
|
366 Integer y = (Integer)link[2];
|
|
367 matches.put(x, y);
|
|
368 link = (Object[])link[0];
|
|
369 }
|
|
370 }
|
|
371
|
|
372 return toArray(matches);
|
|
373 }
|
|
374
|
|
375 /**
|
|
376 * Converts the map (indexed by java.lang.Integers) into an array.
|
|
377 */
|
|
378 protected static Integer[] toArray(TreeMap map)
|
|
379 {
|
|
380 int size = map.size() == 0 ? 0 : 1 + ((Integer)map.lastKey()).intValue();
|
|
381 Integer[] ary = new Integer[size];
|
|
382 Iterator it = map.keySet().iterator();
|
|
383
|
|
384 while (it.hasNext()) {
|
|
385 Integer idx = (Integer)it.next();
|
|
386 Integer val = (Integer)map.get(idx);
|
|
387 ary[idx.intValue()] = val;
|
|
388 }
|
|
389 return ary;
|
|
390 }
|
|
391
|
|
392 /**
|
|
393 * Returns whether the integer is not zero (including if it is not null).
|
|
394 */
|
|
395 protected static boolean isNonzero(Integer i)
|
|
396 {
|
|
397 return i != null && i.intValue() != 0;
|
|
398 }
|
|
399
|
|
400 /**
|
|
401 * Returns whether the value in the map for the given index is greater than
|
|
402 * the given value.
|
|
403 */
|
|
404 protected boolean isGreaterThan(Integer index, Integer val)
|
|
405 {
|
|
406 Integer lhs = thresh.get(index);
|
|
407 return lhs != null && val != null && lhs.compareTo(val) > 0;
|
|
408 }
|
|
409
|
|
410 /**
|
|
411 * Returns whether the value in the map for the given index is less than
|
|
412 * the given value.
|
|
413 */
|
|
414 protected boolean isLessThan(Integer index, Integer val)
|
|
415 {
|
|
416 Integer lhs = thresh.get(index);
|
|
417 return lhs != null && (val == null || lhs.compareTo(val) < 0);
|
|
418 }
|
|
419
|
|
420 /**
|
|
421 * Returns the value for the greatest key in the map.
|
|
422 */
|
|
423 protected Integer getLastValue()
|
|
424 {
|
|
425 return thresh.get(thresh.lastKey());
|
|
426 }
|
|
427
|
|
428 /**
|
|
429 * Adds the given value to the "end" of the threshold map, that is, with the
|
|
430 * greatest index/key.
|
|
431 */
|
|
432 protected void append(Integer value)
|
|
433 {
|
|
434 Integer addIdx = null;
|
|
435 if (thresh.size() == 0) {
|
|
436 addIdx = new Integer(0);
|
|
437 }
|
|
438 else {
|
|
439 Integer lastKey = thresh.lastKey();
|
|
440 addIdx = new Integer(lastKey.intValue() + 1);
|
|
441 }
|
|
442 thresh.put(addIdx, value);
|
|
443 }
|
|
444
|
|
445 /**
|
|
446 * Inserts the given values into the threshold map.
|
|
447 */
|
|
448 protected Integer insert(Integer j, Integer k)
|
|
449 {
|
|
450 if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(new Integer(k.intValue() - 1), j)) {
|
|
451 thresh.put(k, j);
|
|
452 }
|
|
453 else {
|
|
454 int hi = -1;
|
|
455
|
|
456 if (isNonzero(k)) {
|
|
457 hi = k.intValue();
|
|
458 }
|
|
459 else if (thresh.size() > 0) {
|
|
460 hi = thresh.lastKey().intValue();
|
|
461 }
|
|
462
|
|
463 // off the end?
|
|
464 if (hi == -1 || j.compareTo(getLastValue()) > 0) {
|
|
465 append(j);
|
|
466 k = new Integer(hi + 1);
|
|
467 }
|
|
468 else {
|
|
469 // binary search for insertion point:
|
|
470 int lo = 0;
|
|
471
|
|
472 while (lo <= hi) {
|
|
473 int index = (hi + lo) / 2;
|
|
474 Integer val = thresh.get(new Integer(index));
|
|
475 int cmp = j.compareTo(val);
|
|
476
|
|
477 if (cmp == 0) {
|
|
478 return null;
|
|
479 }
|
|
480 else if (cmp > 0) {
|
|
481 lo = index + 1;
|
|
482 }
|
|
483 else {
|
|
484 hi = index - 1;
|
|
485 }
|
|
486 }
|
|
487
|
|
488 thresh.put(new Integer(lo), j);
|
|
489 k = new Integer(lo);
|
|
490 }
|
|
491 }
|
|
492
|
|
493 return k;
|
|
494 }
|
|
495
|
|
496 }
|