1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util.diff;
24  
25  import com.liferay.portal.kernel.util.FileUtil;
26  import com.liferay.portal.kernel.util.StringPool;
27  
28  import java.io.Reader;
29  
30  import java.util.ArrayList;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  import org.incava.util.diff.Diff;
35  import org.incava.util.diff.Difference;
36  
37  /**
38   * <a href="DiffUtil.java.html"><b><i>View Source</i></b></a>
39   *
40   * <p>
41   * This class can compare two different versions of a text. Source refers to
42   * the earliest version of the text and target refers to a modified version of
43   * source. Changes are considered either as a removal from the source or as an
44   * addition to the target. This class detects changes to an entire line and also
45   * detects changes within lines, such as, removal or addition of characters.
46   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
47   * </p>
48   *
49   * @author Bruno Farache
50   *
51   * @deprecated This class has been repackaged at
52   * <code>com.liferay.portal.kernel.util</code>.
53   *
54   */
55  public class DiffUtil {
56  
57      public static final String OPEN_INS = "<ins>";
58  
59      public static final String CLOSE_INS = "</ins>";
60  
61      public static final String OPEN_DEL = "<del>";
62  
63      public static final String CLOSE_DEL = "</del>";
64  
65      public static final String CONTEXT_LINE = "#context#line#";
66  
67      /**
68       * This is a diff method with default values.
69       *
70       * @param       source the <code>Reader</code> of the source text
71       * @param       target the <code>Reader</code> of the target text
72       * @return      an array containing two lists of <code>DiffResults</code>,
73       *              the first element contains DiffResults related to changes
74       *              in source and the second element to changes in target
75       */
76      public static List<DiffResult>[] diff(Reader source, Reader target) {
77          int margin = 2;
78  
79          return diff(
80              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
81      }
82  
83      /**
84       * The main entrance of this class. This method will compare the two texts,
85       * highlight the changes by enclosing them with markers and return a list
86       * of <code>DiffResults</code>.
87       *
88       * @param       source the <code>Reader</code> of the source text, this can
89       *              be for example, an instance of FileReader or StringReader
90       * @param       target the <code>Reader</code> of the target text
91       * @param       addedMarkerStart the initial marker for highlighting
92       *              additions
93       * @param       addedMarkerEnd the end marker for highlighting additions
94       * @param       deletedMarkerStart the initial marker for highlighting
95       *              removals
96       * @param       deletedMarkerEnd the end marker for highlighting removals
97       * @param       margin the number of lines that will be added before the
98       *              first changed line
99       * @return      an array containing two lists of <code>DiffResults</code>,
100      *              the first element contains DiffResults related to changes
101      *              in source and the second element to changes in target
102      */
103     public static List<DiffResult>[] diff(
104         Reader source, Reader target, String addedMarkerStart,
105         String addedMarkerEnd, String deletedMarkerStart,
106         String deletedMarkerEnd, int margin) {
107 
108         List<DiffResult> sourceResults = new ArrayList<DiffResult>();
109         List<DiffResult> targetResults = new ArrayList<DiffResult>();
110 
111         List<DiffResult>[] results = new List[] {sourceResults, targetResults};
112 
113         // Convert the texts to Lists where each element are lines of the texts.
114 
115         List<String> sourceStringList = FileUtil.toList(source);
116         List<String> targetStringList = FileUtil.toList(target);
117 
118         // Make a a Diff of these lines and iterate over their Differences.
119 
120         Diff diff = new Diff(sourceStringList, targetStringList);
121 
122         List<Difference> differences = diff.diff();
123 
124         Iterator<Difference> itr = differences.iterator();
125 
126         while (itr.hasNext()) {
127             Difference difference = itr.next();
128 
129             if (difference.getAddedEnd() == Difference.NONE) {
130 
131                 // Lines were deleted from source only.
132 
133                 _highlightLines(
134                     sourceStringList, deletedMarkerStart,
135                     deletedMarkerEnd, difference.getDeletedStart(),
136                     difference.getDeletedEnd());
137 
138                 margin = _calculateMargin(
139                     sourceResults, targetResults, difference.getDeletedStart(),
140                     difference.getAddedStart(), margin);
141 
142                 List<String> changedLines = _addMargins(
143                     sourceResults, sourceStringList,
144                     difference.getDeletedStart(), margin);
145 
146                 _addResults(
147                     sourceResults, sourceStringList, changedLines,
148                     difference.getDeletedStart(), difference.getDeletedEnd());
149 
150                 changedLines = _addMargins(
151                     targetResults, targetStringList, difference.getAddedStart(),
152                     margin);
153 
154                 int deletedLines =
155                     difference.getDeletedEnd() + 1 -
156                         difference.getDeletedStart();
157 
158                 for (int i = 0; i < deletedLines; i++) {
159                     changedLines.add(CONTEXT_LINE);
160                 }
161 
162                 DiffResult diffResult = new DiffResult(
163                     difference.getDeletedStart(), changedLines);
164 
165                 targetResults.add(diffResult);
166             }
167             else if (difference.getDeletedEnd() == Difference.NONE) {
168 
169                 // Lines were added to target only.
170 
171                 _highlightLines(
172                     targetStringList, addedMarkerStart, addedMarkerEnd,
173                     difference.getAddedStart(), difference.getAddedEnd());
174 
175                 margin = _calculateMargin(
176                     sourceResults, targetResults, difference.getDeletedStart(),
177                     difference.getAddedStart(), margin);
178 
179                 List<String> changedLines = _addMargins(
180                     sourceResults, sourceStringList,
181                     difference.getDeletedStart(), margin);
182 
183                 int addedLines =
184                     difference.getAddedEnd() + 1 - difference.getAddedStart();
185 
186                 for (int i = 0; i < addedLines; i++) {
187                     changedLines.add(CONTEXT_LINE);
188                 }
189 
190                 DiffResult diffResult = new DiffResult(
191                     difference.getAddedStart(), changedLines);
192 
193                 sourceResults.add(diffResult);
194 
195                 changedLines = _addMargins(
196                     targetResults, targetStringList, difference.getAddedStart(),
197                     margin);
198 
199                 _addResults(
200                     targetResults, targetStringList, changedLines,
201                     difference.getAddedStart(), difference.getAddedEnd());
202             }
203             else {
204 
205                 // Lines were deleted from source and added to target at the
206                 // same position. It needs to check for characters differences.
207 
208                 _checkCharDiffs(
209                     sourceResults, targetResults, sourceStringList,
210                     targetStringList, addedMarkerStart, addedMarkerEnd,
211                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
212             }
213         }
214 
215         return results;
216     }
217 
218     private static List<String> _addMargins(
219         List<DiffResult> results, List<String> stringList, int startPos,
220         int margin) {
221 
222         List<String> changedLines = new ArrayList<String>();
223 
224         if (margin == 0 || startPos == 0) {
225             return changedLines;
226         }
227 
228         int i = startPos - margin;
229 
230         for (; i < 0; i++) {
231             changedLines.add(CONTEXT_LINE);
232         }
233 
234         for (; i < startPos; i++) {
235             if (i < stringList.size()) {
236                 changedLines.add(stringList.get(i));
237             }
238         }
239 
240         return changedLines;
241     }
242 
243     private static void _addResults(
244         List<DiffResult> results, List<String> stringList,
245         List<String> changedLines, int start, int end) {
246 
247         changedLines.addAll(stringList.subList(start, end + 1));
248 
249         DiffResult diffResult = new DiffResult(
250             start, changedLines);
251 
252         results.add(diffResult);
253     }
254 
255     private static int _calculateMargin(
256         List<DiffResult> sourceResults, List<DiffResult> targetResults,
257         int sourceBeginPos, int targetBeginPos, int margin) {
258 
259         int sourceMargin = _checkOverlapping(
260             sourceResults, sourceBeginPos, margin);
261         int targetMargin = _checkOverlapping(
262             targetResults, targetBeginPos, margin);
263 
264         if (sourceMargin < targetMargin) {
265             return sourceMargin;
266         }
267 
268         return targetMargin;
269     }
270 
271     private static void _checkCharDiffs(
272         List<DiffResult> sourceResults, List<DiffResult> targetResults,
273         List<String> sourceStringList, List<String> targetStringList,
274         String addedMarkerStart, String addedMarkerEnd,
275         String deletedMarkerStart, String deletedMarkerEnd,
276         Difference difference, int margin) {
277 
278         boolean aligned = false;
279 
280         int i = difference.getDeletedStart();
281         int j = difference.getAddedStart();
282 
283         // A line with changed characters may have its position shifted some
284         // lines above or below. These for loops will try to align these lines.
285         // While these lines are not aligned, highlight them as either additions
286         // or deletions.
287 
288         for (; i <= difference.getDeletedEnd(); i++) {
289             for (; j <= difference.getAddedEnd(); j++) {
290                 if (_lineDiff(
291                         sourceResults, targetResults, sourceStringList,
292                         targetStringList, addedMarkerStart, addedMarkerEnd,
293                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
294 
295                     aligned = true;
296 
297                     break;
298                 }
299                 else {
300                     _highlightLines(
301                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
302                         j);
303 
304                     DiffResult targetResult = new DiffResult(
305                         j, targetStringList.subList(j, j + 1));
306 
307                     targetResults.add(targetResult);
308 
309                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
310                 }
311             }
312 
313             if (aligned) {
314                  break;
315             }
316             else {
317                 _highlightLines(
318                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
319                     i);
320 
321                 DiffResult sourceResult = new DiffResult(
322                     i, sourceStringList.subList(i, i + 1));
323 
324                 sourceResults.add(sourceResult);
325 
326                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
327             }
328         }
329 
330         i = i + 1;
331         j = j + 1;
332 
333         // Lines are aligned, check for differences of the following lines.
334 
335         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
336                 i++, j++) {
337 
338             _lineDiff(
339                 sourceResults, targetResults, sourceStringList,
340                 targetStringList, addedMarkerStart, addedMarkerEnd,
341                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
342         }
343 
344         // After the for loop above, some lines might remained unchecked.
345         // They are considered as deletions or additions.
346 
347         for (; i <= difference.getDeletedEnd();i++) {
348             _highlightLines(
349                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
350 
351             DiffResult sourceResult = new DiffResult(
352                 i, sourceStringList.subList(i, i + 1));
353 
354             sourceResults.add(sourceResult);
355 
356             targetResults.add(new DiffResult(i, CONTEXT_LINE));
357         }
358 
359         for (; j <= difference.getAddedEnd(); j++) {
360             _highlightLines(
361                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
362 
363             DiffResult targetResult = new DiffResult(
364                 j, targetStringList.subList(j, j + 1));
365 
366             targetResults.add(targetResult);
367 
368             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
369         }
370     }
371 
372     private static int _checkOverlapping(
373         List<DiffResult> results, int startPos, int margin) {
374 
375         if (results.size() == 0 || (startPos - margin) < 0) {
376             return margin;
377         }
378 
379         DiffResult lastDiff = results.get(results.size() - 1);
380 
381         if (lastDiff.getChangedLines().size() == 0) {
382             return margin;
383         }
384 
385         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
386             lastDiff.getChangedLines().size();
387 
388         int currentChangedLine = startPos - margin;
389 
390         if ((lastDiff.getChangedLines().size() == 1) &&
391             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
392 
393             currentChangedLine = currentChangedLine + 1;
394         }
395 
396         if (currentChangedLine < lastChangedLine) {
397             return margin + currentChangedLine - lastChangedLine;
398         }
399 
400         return margin;
401     }
402 
403     private static void _highlightChars(
404         List<String> stringList, String markerStart, String markerEnd,
405         int startPos, int endPos) {
406 
407         String start = markerStart + stringList.get(startPos);
408 
409         stringList.set(startPos, start);
410 
411         String end = stringList.get(endPos) + markerEnd;
412 
413         stringList.set(endPos, end);
414     }
415 
416     private static void _highlightLines(
417         List<String> stringList, String markerStart, String markerEnd,
418         int startPos, int endPos) {
419 
420         for (int i = startPos; i <= endPos; i++) {
421             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
422         }
423     }
424 
425     private static boolean _lineDiff(
426         List<DiffResult> sourceResults, List<DiffResult> targetResults,
427         List<String> sourceStringList, List<String> targetStringList,
428         String addedMarkerStart, String addedMarkerEnd,
429         String deletedMarkerStart, String deletedMarkerEnd,
430         int sourceChangedLine, int targetChangedLine, boolean aligned) {
431 
432         String source = sourceStringList.get(sourceChangedLine);
433         String target = targetStringList.get(targetChangedLine);
434 
435         // Convert the lines to lists where each element are chars of the lines.
436 
437         List<String> sourceList = _toList(source);
438         List<String> targetList = _toList(target);
439 
440         Diff diff = new Diff(sourceList, targetList);
441 
442         List<Difference> differences = diff.diff();
443 
444         Iterator<Difference> itr = differences.iterator();
445 
446         int deletedChars = 0;
447         int addedChars = 0;
448 
449         // The following while loop will calculate how many characters of
450         // the source line need to be changed to be equals to the target line.
451 
452         while (itr.hasNext() && !aligned) {
453             Difference difference = itr.next();
454 
455             if (difference.getDeletedEnd() != Difference.NONE) {
456                 deletedChars =
457                     deletedChars +
458                     (difference.getDeletedEnd() -
459                         difference.getDeletedStart() + 1);
460             }
461 
462             if (difference.getAddedEnd() != Difference.NONE) {
463                 addedChars =
464                     addedChars +
465                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
466             }
467         }
468 
469         // If a lot of changes were needed (more than half of the source line
470         // length), consider this as not aligned yet.
471 
472         if ((deletedChars > (sourceList.size() / 2)) ||
473             (addedChars > sourceList.size() / 2)) {
474 
475             return false;
476         }
477 
478         itr = differences.iterator();
479 
480         boolean sourceChanged = false;
481         boolean targetChanged = false;
482 
483         // Iterate over Differences between chars of these lines.
484 
485         while (itr.hasNext()) {
486             Difference difference = itr.next();
487 
488             if (difference.getAddedEnd() == Difference.NONE) {
489 
490                 // Chars were deleted from source only.
491 
492                 _highlightChars(
493                     sourceList, deletedMarkerStart,
494                     deletedMarkerEnd, difference.getDeletedStart(),
495                     difference.getDeletedEnd());
496 
497                 sourceChanged = true;
498             }
499             else if (difference.getDeletedEnd() == Difference.NONE) {
500 
501                 // Chars were added to target only.
502 
503                 _highlightChars(
504                     targetList, addedMarkerStart, addedMarkerEnd,
505                     difference.getAddedStart(), difference.getAddedEnd());
506 
507                 targetChanged = true;
508             }
509             else {
510 
511                 // Chars were both deleted and added.
512 
513                 _highlightChars(
514                     sourceList, deletedMarkerStart,
515                     deletedMarkerEnd, difference.getDeletedStart(),
516                     difference.getDeletedEnd());
517 
518                 sourceChanged = true;
519 
520                 _highlightChars(
521                     targetList, addedMarkerStart, addedMarkerEnd,
522                     difference.getAddedStart(), difference.getAddedEnd());
523 
524                 targetChanged = true;
525             }
526         }
527 
528         if (sourceChanged) {
529             DiffResult sourceResult = new DiffResult(
530                 sourceChangedLine, _toString(sourceList));
531 
532             sourceResults.add(sourceResult);
533 
534             if (!targetChanged) {
535                 DiffResult targetResult = new DiffResult(
536                     targetChangedLine, target);
537 
538                 targetResults.add(targetResult);
539             }
540         }
541 
542         if (targetChanged) {
543             if (!sourceChanged) {
544                 DiffResult sourceResult = new DiffResult(
545                     sourceChangedLine, source);
546 
547                 sourceResults.add(sourceResult);
548             }
549 
550             DiffResult targetResult = new DiffResult(
551                 targetChangedLine, _toString(targetList));
552 
553             targetResults.add(targetResult);
554         }
555 
556         return true;
557     }
558 
559     private static List<String> _toList(String line) {
560         String[] stringArray = line.split(StringPool.BLANK);
561 
562         List<String> result = new ArrayList<String>();
563 
564         for (int i = 1; i < stringArray.length; i++) {
565             result.add(stringArray[i]);
566         }
567 
568         return result;
569     }
570 
571     private static String _toString(List<String> line) {
572         StringBuilder sb = new StringBuilder();
573 
574         Iterator<String> itr = line.iterator();
575 
576         while (itr.hasNext()) {
577             sb.append(itr.next());
578         }
579 
580         return sb.toString();
581     }
582 
583 }