Mercurial Hosting > nabble
comparison src/fschmidt/util/diff/Diff.java @ 68:00520880ad02
add fschmidt source
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Sun, 05 Oct 2025 17:24:15 -0600 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 67:9d0fefce6985 | 68:00520880ad02 |
|---|---|
| 1 /* | |
| 2 Copyright (c) 2008 Franklin Schmidt <fschmidt@gmail.com> | |
| 3 | |
| 4 Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 5 of this software and associated documentation files (the "Software"), to deal | |
| 6 in the Software without restriction, including without limitation the rights | |
| 7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 8 copies of the Software, and to permit persons to whom the Software is | |
| 9 furnished to do so, subject to the following conditions: | |
| 10 | |
| 11 The above copyright notice and this permission notice shall be included in | |
| 12 all copies or substantial portions of the Software. | |
| 13 | |
| 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 20 THE SOFTWARE. | |
| 21 */ | |
| 22 | |
| 23 package fschmidt.util.diff; | |
| 24 | |
| 25 import java.util.*; | |
| 26 | |
| 27 /* | |
| 28 taken from http://www.incava.org/projects/java-diff/ | |
| 29 */ | |
| 30 | |
| 31 /** | |
| 32 * Compares two collections, returning a list of the additions, changes, and | |
| 33 * deletions between them. A <code>Comparator</code> may be passed as an | |
| 34 * argument to the constructor, and will thus be used. If not provided, the | |
| 35 * initial value in the <code>a</code> ("from") collection will be looked at to | |
| 36 * see if it supports the <code>Comparable</code> interface. If so, its | |
| 37 * <code>equals</code> and <code>compareTo</code> methods will be invoked on the | |
| 38 * instances in the "from" and "to" collections; otherwise, for speed, hash | |
| 39 * codes from the objects will be used instead for comparison. | |
| 40 * | |
| 41 * <p>The file FileDiff.java shows an example usage of this class, in an | |
| 42 * application similar to the Unix "diff" program.</p> | |
| 43 */ | |
| 44 public class Diff | |
| 45 { | |
| 46 /** | |
| 47 * The source array, AKA the "from" values. | |
| 48 */ | |
| 49 protected Object[] a; | |
| 50 | |
| 51 /** | |
| 52 * The target array, AKA the "to" values. | |
| 53 */ | |
| 54 protected Object[] b; | |
| 55 | |
| 56 /** | |
| 57 * The list of differences, as <code>Difference</code> instances. | |
| 58 */ | |
| 59 protected List<Difference> diffs = new ArrayList<Difference>(); | |
| 60 | |
| 61 /** | |
| 62 * The pending, uncommitted difference. | |
| 63 */ | |
| 64 private Difference pending; | |
| 65 | |
| 66 /** | |
| 67 * The comparator used, if any. | |
| 68 */ | |
| 69 private Comparator<Object> comparator; | |
| 70 | |
| 71 /** | |
| 72 * The thresholds. | |
| 73 */ | |
| 74 private TreeMap<Integer,Integer> thresh; | |
| 75 | |
| 76 /** | |
| 77 * Constructs the Diff object for the two arrays, using the given comparator. | |
| 78 */ | |
| 79 public Diff(Object[] a, Object[] b, Comparator<Object> comp) | |
| 80 { | |
| 81 this.a = a; | |
| 82 this.b = b; | |
| 83 this.comparator = comp; | |
| 84 this.thresh = null; // created in getLongestCommonSubsequences | |
| 85 } | |
| 86 | |
| 87 /** | |
| 88 * Constructs the Diff object for the two arrays, using the default | |
| 89 * comparison mechanism between the objects, such as <code>equals</code> and | |
| 90 * <code>compareTo</code>. | |
| 91 */ | |
| 92 public Diff(Object[] a, Object[] b) | |
| 93 { | |
| 94 this(a, b, null); | |
| 95 } | |
| 96 | |
| 97 /** | |
| 98 * Constructs the Diff object for the two collections, using the given | |
| 99 * comparator. | |
| 100 */ | |
| 101 public Diff(Collection a, Collection b, Comparator<Object> comp) | |
| 102 { | |
| 103 this(a.toArray(), b.toArray(), comp); | |
| 104 } | |
| 105 | |
| 106 /** | |
| 107 * Constructs the Diff object for the two collections, using the default | |
| 108 * comparison mechanism between the objects, such as <code>equals</code> and | |
| 109 * <code>compareTo</code>. | |
| 110 */ | |
| 111 public Diff(Collection a, Collection b) | |
| 112 { | |
| 113 this(a, b, null); | |
| 114 } | |
| 115 | |
| 116 /** | |
| 117 * Runs diff and returns the results. | |
| 118 */ | |
| 119 public List<Difference> diff() | |
| 120 { | |
| 121 traverseSequences(); | |
| 122 | |
| 123 // add the last difference, if pending: | |
| 124 if (pending != null) { | |
| 125 diffs.add(pending); | |
| 126 } | |
| 127 | |
| 128 return diffs; | |
| 129 } | |
| 130 | |
| 131 /** | |
| 132 * Traverses the sequences, seeking the longest common subsequences, | |
| 133 * invoking the methods <code>finishedA</code>, <code>finishedB</code>, | |
| 134 * <code>onANotB</code>, and <code>onBNotA</code>. | |
| 135 */ | |
| 136 protected void traverseSequences() | |
| 137 { | |
| 138 Integer[] matches = getLongestCommonSubsequences(); | |
| 139 | |
| 140 int lastA = a.length - 1; | |
| 141 int lastB = b.length - 1; | |
| 142 int bi = 0; | |
| 143 int ai; | |
| 144 | |
| 145 int lastMatch = matches.length - 1; | |
| 146 | |
| 147 for (ai = 0; ai <= lastMatch; ++ai) { | |
| 148 Integer bLine = matches[ai]; | |
| 149 | |
| 150 if (bLine == null) { | |
| 151 onANotB(ai, bi); | |
| 152 } | |
| 153 else { | |
| 154 while (bi < bLine.intValue()) { | |
| 155 onBNotA(ai, bi++); | |
| 156 } | |
| 157 | |
| 158 onMatch(ai, bi++); | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 boolean calledFinishA = false; | |
| 163 boolean calledFinishB = false; | |
| 164 | |
| 165 while (ai <= lastA || bi <= lastB) { | |
| 166 | |
| 167 // last A? | |
| 168 if (ai == lastA + 1 && bi <= lastB) { | |
| 169 if (!calledFinishA && callFinishedA()) { | |
| 170 finishedA(lastA); | |
| 171 calledFinishA = true; | |
| 172 } | |
| 173 else { | |
| 174 while (bi <= lastB) { | |
| 175 onBNotA(ai, bi++); | |
| 176 } | |
| 177 } | |
| 178 } | |
| 179 | |
| 180 // last B? | |
| 181 if (bi == lastB + 1 && ai <= lastA) { | |
| 182 if (!calledFinishB && callFinishedB()) { | |
| 183 finishedB(lastB); | |
| 184 calledFinishB = true; | |
| 185 } | |
| 186 else { | |
| 187 while (ai <= lastA) { | |
| 188 onANotB(ai++, bi); | |
| 189 } | |
| 190 } | |
| 191 } | |
| 192 | |
| 193 if (ai <= lastA) { | |
| 194 onANotB(ai++, bi); | |
| 195 } | |
| 196 | |
| 197 if (bi <= lastB) { | |
| 198 onBNotA(ai, bi++); | |
| 199 } | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 /** | |
| 204 * Override and return true in order to have <code>finishedA</code> invoked | |
| 205 * at the last element in the <code>a</code> array. | |
| 206 */ | |
| 207 protected boolean callFinishedA() | |
| 208 { | |
| 209 return false; | |
| 210 } | |
| 211 | |
| 212 /** | |
| 213 * Override and return true in order to have <code>finishedB</code> invoked | |
| 214 * at the last element in the <code>b</code> array. | |
| 215 */ | |
| 216 protected boolean callFinishedB() | |
| 217 { | |
| 218 return false; | |
| 219 } | |
| 220 | |
| 221 /** | |
| 222 * Invoked at the last element in <code>a</code>, if | |
| 223 * <code>callFinishedA</code> returns true. | |
| 224 */ | |
| 225 protected void finishedA(int lastA) | |
| 226 { | |
| 227 } | |
| 228 | |
| 229 /** | |
| 230 * Invoked at the last element in <code>b</code>, if | |
| 231 * <code>callFinishedB</code> returns true. | |
| 232 */ | |
| 233 protected void finishedB(int lastB) | |
| 234 { | |
| 235 } | |
| 236 | |
| 237 /** | |
| 238 * Invoked for elements in <code>a</code> and not in <code>b</code>. | |
| 239 */ | |
| 240 protected void onANotB(int ai, int bi) | |
| 241 { | |
| 242 if (pending == null) { | |
| 243 pending = new Difference(ai, ai, bi, -1); | |
| 244 } | |
| 245 else { | |
| 246 pending.setDeleted(ai); | |
| 247 } | |
| 248 } | |
| 249 | |
| 250 /** | |
| 251 * Invoked for elements in <code>b</code> and not in <code>a</code>. | |
| 252 */ | |
| 253 protected void onBNotA(int ai, int bi) | |
| 254 { | |
| 255 if (pending == null) { | |
| 256 pending = new Difference(ai, -1, bi, bi); | |
| 257 } | |
| 258 else { | |
| 259 pending.setAdded(bi); | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 /** | |
| 264 * Invoked for elements matching in <code>a</code> and <code>b</code>. | |
| 265 */ | |
| 266 protected void onMatch(int ai, int bi) | |
| 267 { | |
| 268 if (pending == null) { | |
| 269 // no current pending | |
| 270 } | |
| 271 else { | |
| 272 diffs.add(pending); | |
| 273 pending = null; | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 /** | |
| 278 * Compares the two objects, using the comparator provided with the | |
| 279 * constructor, if any. | |
| 280 */ | |
| 281 protected boolean equals(Object x, Object y) | |
| 282 { | |
| 283 return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0; | |
| 284 } | |
| 285 | |
| 286 /** | |
| 287 * Returns an array of the longest common subsequences. | |
| 288 */ | |
| 289 public Integer[] getLongestCommonSubsequences() | |
| 290 { | |
| 291 int aStart = 0; | |
| 292 int aEnd = a.length - 1; | |
| 293 | |
| 294 int bStart = 0; | |
| 295 int bEnd = b.length - 1; | |
| 296 | |
| 297 TreeMap<Integer,Integer> matches = new TreeMap<Integer,Integer>(); | |
| 298 | |
| 299 while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) { | |
| 300 matches.put(new Integer(aStart++), new Integer(bStart++)); | |
| 301 } | |
| 302 | |
| 303 while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) { | |
| 304 matches.put(new Integer(aEnd--), new Integer(bEnd--)); | |
| 305 } | |
| 306 | |
| 307 Map<Object,List<Integer>> bMatches = null; | |
| 308 if (comparator == null) { | |
| 309 if (a.length > 0 && a[0] instanceof Comparable) { | |
| 310 // this uses the Comparable interface | |
| 311 bMatches = new TreeMap<Object,List<Integer>>(); | |
| 312 } | |
| 313 else { | |
| 314 // this just uses hashCode() | |
| 315 bMatches = new HashMap<Object,List<Integer>>(); | |
| 316 } | |
| 317 } | |
| 318 else { | |
| 319 // we don't really want them sorted, but this is the only Map | |
| 320 // implementation (as of JDK 1.4) that takes a comparator. | |
| 321 bMatches = new TreeMap<Object,List<Integer>>(comparator); | |
| 322 } | |
| 323 | |
| 324 for (int bi = bStart; bi <= bEnd; ++bi) { | |
| 325 Object element = b[bi]; | |
| 326 Object key = element; | |
| 327 List<Integer> positions = bMatches.get(key); | |
| 328 if (positions == null) { | |
| 329 positions = new ArrayList<Integer>(); | |
| 330 bMatches.put(key, positions); | |
| 331 } | |
| 332 positions.add(new Integer(bi)); | |
| 333 } | |
| 334 | |
| 335 thresh = new TreeMap<Integer,Integer>(); | |
| 336 Map<Object,Object[]> links = new HashMap<Object,Object[]>(); | |
| 337 | |
| 338 for (int i = aStart; i <= aEnd; ++i) { | |
| 339 Object aElement = a[i]; // keygen here. | |
| 340 List<Integer> positions = bMatches.get(aElement); | |
| 341 | |
| 342 if (positions != null) { | |
| 343 Integer k = new Integer(0); | |
| 344 ListIterator<Integer> pit = positions.listIterator(positions.size()); | |
| 345 while (pit.hasPrevious()) { | |
| 346 Integer j = pit.previous(); | |
| 347 | |
| 348 k = insert(j, k); | |
| 349 | |
| 350 if (k == null) { | |
| 351 // nothing | |
| 352 } | |
| 353 else { | |
| 354 Object value = k.intValue() > 0 ? links.get(new Integer(k.intValue() - 1)) : null; | |
| 355 links.put(k, new Object[] { value, new Integer(i), j }); | |
| 356 } | |
| 357 } | |
| 358 } | |
| 359 } | |
| 360 | |
| 361 if (thresh.size() > 0) { | |
| 362 Integer ti = thresh.lastKey(); | |
| 363 Object[] link = links.get(ti); | |
| 364 while (link != null) { | |
| 365 Integer x = (Integer)link[1]; | |
| 366 Integer y = (Integer)link[2]; | |
| 367 matches.put(x, y); | |
| 368 link = (Object[])link[0]; | |
| 369 } | |
| 370 } | |
| 371 | |
| 372 return toArray(matches); | |
| 373 } | |
| 374 | |
| 375 /** | |
| 376 * Converts the map (indexed by java.lang.Integers) into an array. | |
| 377 */ | |
| 378 protected static Integer[] toArray(TreeMap map) | |
| 379 { | |
| 380 int size = map.size() == 0 ? 0 : 1 + ((Integer)map.lastKey()).intValue(); | |
| 381 Integer[] ary = new Integer[size]; | |
| 382 Iterator it = map.keySet().iterator(); | |
| 383 | |
| 384 while (it.hasNext()) { | |
| 385 Integer idx = (Integer)it.next(); | |
| 386 Integer val = (Integer)map.get(idx); | |
| 387 ary[idx.intValue()] = val; | |
| 388 } | |
| 389 return ary; | |
| 390 } | |
| 391 | |
| 392 /** | |
| 393 * Returns whether the integer is not zero (including if it is not null). | |
| 394 */ | |
| 395 protected static boolean isNonzero(Integer i) | |
| 396 { | |
| 397 return i != null && i.intValue() != 0; | |
| 398 } | |
| 399 | |
| 400 /** | |
| 401 * Returns whether the value in the map for the given index is greater than | |
| 402 * the given value. | |
| 403 */ | |
| 404 protected boolean isGreaterThan(Integer index, Integer val) | |
| 405 { | |
| 406 Integer lhs = thresh.get(index); | |
| 407 return lhs != null && val != null && lhs.compareTo(val) > 0; | |
| 408 } | |
| 409 | |
| 410 /** | |
| 411 * Returns whether the value in the map for the given index is less than | |
| 412 * the given value. | |
| 413 */ | |
| 414 protected boolean isLessThan(Integer index, Integer val) | |
| 415 { | |
| 416 Integer lhs = thresh.get(index); | |
| 417 return lhs != null && (val == null || lhs.compareTo(val) < 0); | |
| 418 } | |
| 419 | |
| 420 /** | |
| 421 * Returns the value for the greatest key in the map. | |
| 422 */ | |
| 423 protected Integer getLastValue() | |
| 424 { | |
| 425 return thresh.get(thresh.lastKey()); | |
| 426 } | |
| 427 | |
| 428 /** | |
| 429 * Adds the given value to the "end" of the threshold map, that is, with the | |
| 430 * greatest index/key. | |
| 431 */ | |
| 432 protected void append(Integer value) | |
| 433 { | |
| 434 Integer addIdx = null; | |
| 435 if (thresh.size() == 0) { | |
| 436 addIdx = new Integer(0); | |
| 437 } | |
| 438 else { | |
| 439 Integer lastKey = thresh.lastKey(); | |
| 440 addIdx = new Integer(lastKey.intValue() + 1); | |
| 441 } | |
| 442 thresh.put(addIdx, value); | |
| 443 } | |
| 444 | |
| 445 /** | |
| 446 * Inserts the given values into the threshold map. | |
| 447 */ | |
| 448 protected Integer insert(Integer j, Integer k) | |
| 449 { | |
| 450 if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(new Integer(k.intValue() - 1), j)) { | |
| 451 thresh.put(k, j); | |
| 452 } | |
| 453 else { | |
| 454 int hi = -1; | |
| 455 | |
| 456 if (isNonzero(k)) { | |
| 457 hi = k.intValue(); | |
| 458 } | |
| 459 else if (thresh.size() > 0) { | |
| 460 hi = thresh.lastKey().intValue(); | |
| 461 } | |
| 462 | |
| 463 // off the end? | |
| 464 if (hi == -1 || j.compareTo(getLastValue()) > 0) { | |
| 465 append(j); | |
| 466 k = new Integer(hi + 1); | |
| 467 } | |
| 468 else { | |
| 469 // binary search for insertion point: | |
| 470 int lo = 0; | |
| 471 | |
| 472 while (lo <= hi) { | |
| 473 int index = (hi + lo) / 2; | |
| 474 Integer val = thresh.get(new Integer(index)); | |
| 475 int cmp = j.compareTo(val); | |
| 476 | |
| 477 if (cmp == 0) { | |
| 478 return null; | |
| 479 } | |
| 480 else if (cmp > 0) { | |
| 481 lo = index + 1; | |
| 482 } | |
| 483 else { | |
| 484 hi = index - 1; | |
| 485 } | |
| 486 } | |
| 487 | |
| 488 thresh.put(new Integer(lo), j); | |
| 489 k = new Integer(lo); | |
| 490 } | |
| 491 } | |
| 492 | |
| 493 return k; | |
| 494 } | |
| 495 | |
| 496 } |
