1   /**
2    * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util.diff;
24  
25  import com.liferay.portal.kernel.util.StringMaker;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.util.FileUtil;
28  
29  import java.io.Reader;
30  
31  import java.util.ArrayList;
32  import java.util.Iterator;
33  import java.util.List;
34  
35  import org.incava.util.diff.Diff;
36  import org.incava.util.diff.Difference;
37  
38  /**
39   * <a href="DiffUtil.java.html"><b><i>View Source</i></b></a>
40   *
41   * <p>
42   * This class can compare two different versions of a text. Source refers to
43   * the earliest version of the text and target refers to a modified version of
44   * source. Changes are considered either as a removal from the source or as an
45   * addition to the target. This class detects changes to an entire line and also
46   * detects changes within lines, such as, removal or addition of characters.
47   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
48   * </p>
49   *
50   * @author Bruno Farache
51   *
52   */
53  public class DiffUtil {
54  
55      public static final String OPEN_INS = "<ins>";
56  
57      public static final String CLOSE_INS = "</ins>";
58  
59      public static final String OPEN_DEL = "<del>";
60  
61      public static final String CLOSE_DEL = "</del>";
62  
63      public static final String CONTEXT_LINE = "#context#line#";
64  
65      /**
66       * This is a diff method with default values.
67       *
68       * @param       source the <code>Reader</code> of the source text
69       * @param       target the <code>Reader</code> of the target text
70       * @return      an array containing two lists of <code>DiffResults</code>,
71       *              the first element contains DiffResults related to changes
72       *              in source and the second element to changes in target
73       */
74      public static List<DiffResult>[] diff(Reader source, Reader target) {
75          int margin = 2;
76  
77          return diff(
78              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
79      }
80  
81      /**
82       * The main entrance of this class. This method will compare the two texts,
83       * highlight the changes by enclosing them with markers and return a list
84       * of <code>DiffResults</code>.
85       *
86       * @param       source the <code>Reader</code> of the source text, this can
87       *              be for example, an instance of FileReader or StringReader
88       * @param       target the <code>Reader</code> of the target text
89       * @param       addedMarkerStart the initial marker for highlighting
90       *              additions
91       * @param       addedMarkerEnd the end marker for highlighting additions
92       * @param       deletedMarkerStart the initial marker for highlighting
93       *              removals
94       * @param       deletedMarkerEnd the end marker for highlighting removals
95       * @param       margin the number of lines that will be added before the
96       *              first changed line
97       * @return      an array containing two lists of <code>DiffResults</code>,
98       *              the first element contains DiffResults related to changes
99       *              in source and the second element to changes in target
100      */
101     public static List<DiffResult>[] diff(
102         Reader source, Reader target, String addedMarkerStart,
103         String addedMarkerEnd, String deletedMarkerStart,
104         String deletedMarkerEnd, int margin) {
105 
106         List<DiffResult> sourceResults = new ArrayList<DiffResult>();
107         List<DiffResult> targetResults = new ArrayList<DiffResult>();
108 
109         List<DiffResult>[] results = new List[] {sourceResults, targetResults};
110 
111         // Convert the texts to Lists where each element are lines of the texts.
112 
113         List<String> sourceStringList = FileUtil.toList(source);
114         List<String> targetStringList = FileUtil.toList(target);
115 
116         // Make a a Diff of these lines and iterate over their Differences.
117 
118         Diff diff = new Diff(sourceStringList, targetStringList);
119 
120         List<Difference> differences = diff.diff();
121 
122         Iterator<Difference> itr = differences.iterator();
123 
124         while (itr.hasNext()) {
125             Difference difference = itr.next();
126 
127             if (difference.getAddedEnd() == Difference.NONE) {
128 
129                 // Lines were deleted from source only.
130 
131                 _highlightLines(
132                     sourceStringList, deletedMarkerStart,
133                     deletedMarkerEnd, difference.getDeletedStart(),
134                     difference.getDeletedEnd());
135 
136                 margin = _calculateMargin(
137                     sourceResults, targetResults, difference.getDeletedStart(),
138                     difference.getAddedStart(), margin);
139 
140                 List<String> changedLines = _addMargins(
141                     sourceResults, sourceStringList,
142                     difference.getDeletedStart(), margin);
143 
144                 _addResults(
145                     sourceResults, sourceStringList, changedLines,
146                     difference.getDeletedStart(), difference.getDeletedEnd());
147 
148                 changedLines = _addMargins(
149                     targetResults, targetStringList, difference.getAddedStart(),
150                     margin);
151 
152                 int deletedLines =
153                     difference.getDeletedEnd() + 1 -
154                         difference.getDeletedStart();
155 
156                 for (int i = 0; i < deletedLines; i++) {
157                     changedLines.add(CONTEXT_LINE);
158                 }
159 
160                 DiffResult diffResult = new DiffResult(
161                     difference.getDeletedStart(), changedLines);
162 
163                 targetResults.add(diffResult);
164             }
165             else if (difference.getDeletedEnd() == Difference.NONE) {
166 
167                 // Lines were added to target only.
168 
169                 _highlightLines(
170                     targetStringList, addedMarkerStart, addedMarkerEnd,
171                     difference.getAddedStart(), difference.getAddedEnd());
172 
173                 margin = _calculateMargin(
174                     sourceResults, targetResults, difference.getDeletedStart(),
175                     difference.getAddedStart(), margin);
176 
177                 List<String> changedLines = _addMargins(
178                     sourceResults, sourceStringList,
179                     difference.getDeletedStart(), margin);
180 
181                 int addedLines =
182                     difference.getAddedEnd() + 1 - difference.getAddedStart();
183 
184                 for (int i = 0; i < addedLines; i++) {
185                     changedLines.add(CONTEXT_LINE);
186                 }
187 
188                 DiffResult diffResult = new DiffResult(
189                     difference.getAddedStart(), changedLines);
190 
191                 sourceResults.add(diffResult);
192 
193                 changedLines = _addMargins(
194                     targetResults, targetStringList, difference.getAddedStart(),
195                     margin);
196 
197                 _addResults(
198                     targetResults, targetStringList, changedLines,
199                     difference.getAddedStart(), difference.getAddedEnd());
200             }
201             else {
202 
203                 // Lines were deleted from source and added to target at the
204                 // same position. It needs to check for characters differences.
205 
206                 _checkCharDiffs(
207                     sourceResults, targetResults, sourceStringList,
208                     targetStringList, addedMarkerStart, addedMarkerEnd,
209                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
210             }
211         }
212 
213         return results;
214     }
215 
216     private static List<String> _addMargins(
217         List<DiffResult> results, List<String> stringList, int beginPos,
218         int margin) {
219 
220         List<String> changedLines = new ArrayList<String>();
221 
222         if (margin == 0 || beginPos == 0) {
223             return changedLines;
224         }
225 
226         int i = beginPos - margin;
227 
228         for (; i < 0; i++) {
229             changedLines.add(CONTEXT_LINE);
230         }
231 
232         for (; i < beginPos; i++) {
233             if (i < stringList.size()) {
234                 changedLines.add(stringList.get(i));
235             }
236         }
237 
238         return changedLines;
239     }
240 
241     private static void _addResults(
242         List<DiffResult> results, List<String> stringList,
243         List<String> changedLines, int start, int end) {
244 
245         changedLines.addAll(stringList.subList(start, end + 1));
246 
247         DiffResult diffResult = new DiffResult(
248             start, changedLines);
249 
250         results.add(diffResult);
251     }
252 
253     private static int _calculateMargin(
254         List<DiffResult> sourceResults, List<DiffResult> targetResults,
255         int sourceBeginPos, int targetBeginPos, int margin) {
256 
257         int sourceMargin = _checkOverlapping(
258             sourceResults, sourceBeginPos, margin);
259         int targetMargin = _checkOverlapping(
260             targetResults, targetBeginPos, margin);
261 
262         if (sourceMargin < targetMargin) {
263             return sourceMargin;
264         }
265 
266         return targetMargin;
267     }
268 
269     private static void _checkCharDiffs(
270         List<DiffResult> sourceResults, List<DiffResult> targetResults,
271         List<String> sourceStringList, List<String> targetStringList,
272         String addedMarkerStart, String addedMarkerEnd,
273         String deletedMarkerStart, String deletedMarkerEnd,
274         Difference difference, int margin) {
275 
276         boolean aligned = false;
277 
278         int i = difference.getDeletedStart();
279         int j = difference.getAddedStart();
280 
281         // A line with changed characters may have its position shifted some
282         // lines above or below. These for loops will try to align these lines.
283         // While these lines are not aligned, highlight them as either additions
284         // or deletions.
285 
286         for (; i <= difference.getDeletedEnd(); i++) {
287             for (; j <= difference.getAddedEnd(); j++) {
288                 if (_lineDiff(
289                         sourceResults, targetResults, sourceStringList,
290                         targetStringList, addedMarkerStart, addedMarkerEnd,
291                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
292 
293                     aligned = true;
294 
295                     break;
296                 }
297                 else {
298                     _highlightLines(
299                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
300                         j);
301 
302                     DiffResult targetResult = new DiffResult(
303                         j, targetStringList.subList(j, j + 1));
304 
305                     targetResults.add(targetResult);
306 
307                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
308                 }
309             }
310 
311             if (aligned) {
312                  break;
313             }
314             else {
315                 _highlightLines(
316                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
317                     i);
318 
319                 DiffResult sourceResult = new DiffResult(
320                     i, sourceStringList.subList(i, i + 1));
321 
322                 sourceResults.add(sourceResult);
323 
324                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
325             }
326         }
327 
328         i = i + 1;
329         j = j + 1;
330 
331         // Lines are aligned, check for differences of the following lines.
332 
333         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
334                 i++, j++) {
335 
336             _lineDiff(
337                 sourceResults, targetResults, sourceStringList,
338                 targetStringList, addedMarkerStart, addedMarkerEnd,
339                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
340         }
341 
342         // After the for loop above, some lines might remained unchecked.
343         // They are considered as deletions or additions.
344 
345         for (; i <= difference.getDeletedEnd();i++) {
346             _highlightLines(
347                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
348 
349             DiffResult sourceResult = new DiffResult(
350                 i, sourceStringList.subList(i, i + 1));
351 
352             sourceResults.add(sourceResult);
353 
354             targetResults.add(new DiffResult(i, CONTEXT_LINE));
355         }
356 
357         for (; j <= difference.getAddedEnd(); j++) {
358             _highlightLines(
359                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
360 
361             DiffResult targetResult = new DiffResult(
362                 j, targetStringList.subList(j, j + 1));
363 
364             targetResults.add(targetResult);
365 
366             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
367         }
368     }
369 
370     private static int _checkOverlapping(
371         List<DiffResult> results, int beginPos, int margin) {
372 
373         if (results.size() == 0 || (beginPos - margin) < 0) {
374             return margin;
375         }
376 
377         DiffResult lastDiff = results.get(results.size() - 1);
378 
379         if (lastDiff.getChangedLines().size() == 0) {
380             return margin;
381         }
382 
383         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
384             lastDiff.getChangedLines().size();
385 
386         int currentChangedLine = beginPos - margin;
387 
388         if ((lastDiff.getChangedLines().size() == 1) &&
389             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
390 
391             currentChangedLine = currentChangedLine + 1;
392         }
393 
394         if (currentChangedLine < lastChangedLine) {
395             return margin + currentChangedLine - lastChangedLine;
396         }
397 
398         return margin;
399     }
400 
401     private static void _highlightChars(
402         List<String> stringList, String markerStart, String markerEnd,
403         int beginPos, int endPos) {
404 
405         String start = markerStart + stringList.get(beginPos);
406 
407         stringList.set(beginPos, start);
408 
409         String end = stringList.get(endPos) + markerEnd;
410 
411         stringList.set(endPos, end);
412     }
413 
414     private static void _highlightLines(
415         List<String> stringList, String markerStart, String markerEnd,
416         int beginPos, int endPos) {
417 
418         for (int i = beginPos; i <= endPos; i++) {
419             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
420         }
421     }
422 
423     private static boolean _lineDiff(
424         List<DiffResult> sourceResults, List<DiffResult> targetResults,
425         List<String> sourceStringList, List<String> targetStringList,
426         String addedMarkerStart, String addedMarkerEnd,
427         String deletedMarkerStart, String deletedMarkerEnd,
428         int sourceChangedLine, int targetChangedLine, boolean aligned) {
429 
430         String source = sourceStringList.get(sourceChangedLine);
431         String target = targetStringList.get(targetChangedLine);
432 
433         // Convert the lines to lists where each element are chars of the lines.
434 
435         List<String> sourceList = _toList(source);
436         List<String> targetList = _toList(target);
437 
438         Diff diff = new Diff(sourceList, targetList);
439 
440         List<Difference> differences = diff.diff();
441 
442         Iterator<Difference> itr = differences.iterator();
443 
444         int deletedChars = 0;
445         int addedChars = 0;
446 
447         // The following while loop will calculate how many characters of
448         // the source line need to be changed to be equals to the target line.
449 
450         while (itr.hasNext() && !aligned) {
451             Difference difference = itr.next();
452 
453             if (difference.getDeletedEnd() != Difference.NONE) {
454                 deletedChars =
455                     deletedChars +
456                     (difference.getDeletedEnd() -
457                         difference.getDeletedStart() + 1);
458             }
459 
460             if (difference.getAddedEnd() != Difference.NONE) {
461                 addedChars =
462                     addedChars +
463                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
464             }
465         }
466 
467         // If a lot of changes were needed (more than half of the source line
468         // length), consider this as not aligned yet.
469 
470         if ((deletedChars > (sourceList.size() / 2)) ||
471             (addedChars > sourceList.size() / 2)) {
472 
473             return false;
474         }
475 
476         itr = differences.iterator();
477 
478         boolean sourceChanged = false;
479         boolean targetChanged = false;
480 
481         // Iterate over Differences between chars of these lines.
482 
483         while (itr.hasNext()) {
484             Difference difference = itr.next();
485 
486             if (difference.getAddedEnd() == Difference.NONE) {
487 
488                 // Chars were deleted from source only.
489 
490                 _highlightChars(
491                     sourceList, deletedMarkerStart,
492                     deletedMarkerEnd, difference.getDeletedStart(),
493                     difference.getDeletedEnd());
494 
495                 sourceChanged = true;
496             }
497             else if (difference.getDeletedEnd() == Difference.NONE) {
498 
499                 // Chars were added to target only.
500 
501                 _highlightChars(
502                     targetList, addedMarkerStart, addedMarkerEnd,
503                     difference.getAddedStart(), difference.getAddedEnd());
504 
505                 targetChanged = true;
506             }
507             else {
508 
509                 // Chars were both deleted and added.
510 
511                 _highlightChars(
512                     sourceList, deletedMarkerStart,
513                     deletedMarkerEnd, difference.getDeletedStart(),
514                     difference.getDeletedEnd());
515 
516                 sourceChanged = true;
517 
518                 _highlightChars(
519                     targetList, addedMarkerStart, addedMarkerEnd,
520                     difference.getAddedStart(), difference.getAddedEnd());
521 
522                 targetChanged = true;
523             }
524         }
525 
526         if (sourceChanged) {
527             DiffResult sourceResult = new DiffResult(
528                 sourceChangedLine, _toString(sourceList));
529 
530             sourceResults.add(sourceResult);
531 
532             if (!targetChanged) {
533                 DiffResult targetResult = new DiffResult(
534                     targetChangedLine, target);
535 
536                 targetResults.add(targetResult);
537             }
538         }
539 
540         if (targetChanged) {
541             if (!sourceChanged) {
542                 DiffResult sourceResult = new DiffResult(
543                     sourceChangedLine, source);
544 
545                 sourceResults.add(sourceResult);
546             }
547 
548             DiffResult targetResult = new DiffResult(
549                 targetChangedLine, _toString(targetList));
550 
551             targetResults.add(targetResult);
552         }
553 
554         return true;
555     }
556 
557     private static List<String> _toList(String line) {
558         String[] stringArray = line.split(StringPool.BLANK);
559 
560         List<String> result = new ArrayList<String>();
561 
562         for (int i = 1; i < stringArray.length; i++) {
563             result.add(stringArray[i]);
564         }
565 
566         return result;
567     }
568 
569     private static String _toString(List<String> line) {
570         StringMaker sm = new StringMaker();
571 
572         Iterator<String> itr = line.iterator();
573 
574         while (itr.hasNext()) {
575             sm.append(itr.next());
576         }
577 
578         return sm.toString();
579     }
580 
581 }