1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portal.util;
24  
25  import com.liferay.portal.kernel.util.DiffResult;
26  import com.liferay.portal.kernel.util.FileUtil;
27  import com.liferay.portal.kernel.util.StringPool;
28  
29  import java.io.Reader;
30  
31  import java.util.ArrayList;
32  import java.util.Iterator;
33  import java.util.List;
34  
35  import org.incava.util.diff.Diff;
36  import org.incava.util.diff.Difference;
37  
38  /**
39   * <a href="DiffImpl.java.html"><b><i>View Source</i></b></a>
40   *
41   * <p>
42   * This class can compare two different versions of a text. Source refers to
43   * the earliest version of the text and target refers to a modified version of
44   * source. Changes are considered either as a removal from the source or as an
45   * addition to the target. This class detects changes to an entire line and also
46   * detects changes within lines, such as, removal or addition of characters.
47   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
48   * </p>
49   *
50   * @author Bruno Farache
51   *
52   */
53  public class DiffImpl implements com.liferay.portal.kernel.util.Diff {
54  
55      /**
56       * This is a diff method with default values.
57       *
58       * @param       source the <code>Reader</code> of the source text
59       * @param       target the <code>Reader</code> of the target text
60       * @return      an array containing two lists of <code>DiffResults</code>,
61       *              the first element contains DiffResults related to changes
62       *              in source and the second element to changes in target
63       */
64      public List<DiffResult>[] diff(Reader source, Reader target) {
65          int margin = 2;
66  
67          return diff(
68              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
69      }
70  
71      /**
72       * The main entrance of this class. This method will compare the two texts,
73       * highlight the changes by enclosing them with markers and return a list
74       * of <code>DiffResults</code>.
75       *
76       * @param       source the <code>Reader</code> of the source text, this can
77       *              be for example, an instance of FileReader or StringReader
78       * @param       target the <code>Reader</code> of the target text
79       * @param       addedMarkerStart the initial marker for highlighting
80       *              additions
81       * @param       addedMarkerEnd the end marker for highlighting additions
82       * @param       deletedMarkerStart the initial marker for highlighting
83       *              removals
84       * @param       deletedMarkerEnd the end marker for highlighting removals
85       * @param       margin the number of lines that will be added before the
86       *              first changed line
87       * @return      an array containing two lists of <code>DiffResults</code>,
88       *              the first element contains DiffResults related to changes
89       *              in source and the second element to changes in target
90       */
91      public List<DiffResult>[] diff(
92          Reader source, Reader target, String addedMarkerStart,
93          String addedMarkerEnd, String deletedMarkerStart,
94          String deletedMarkerEnd, int margin) {
95  
96          List<DiffResult> sourceResults = new ArrayList<DiffResult>();
97          List<DiffResult> targetResults = new ArrayList<DiffResult>();
98  
99          List<DiffResult>[] results = new List[] {sourceResults, targetResults};
100 
101         // Convert the texts to Lists where each element are lines of the texts.
102 
103         List<String> sourceStringList = FileUtil.toList(source);
104         List<String> targetStringList = FileUtil.toList(target);
105 
106         // Make a a Diff of these lines and iterate over their Differences.
107 
108         Diff diff = new Diff(sourceStringList, targetStringList);
109 
110         List<Difference> differences = diff.diff();
111 
112         Iterator<Difference> itr = differences.iterator();
113 
114         while (itr.hasNext()) {
115             Difference difference = itr.next();
116 
117             if (difference.getAddedEnd() == Difference.NONE) {
118 
119                 // Lines were deleted from source only.
120 
121                 _highlightLines(
122                     sourceStringList, deletedMarkerStart,
123                     deletedMarkerEnd, difference.getDeletedStart(),
124                     difference.getDeletedEnd());
125 
126                 margin = _calculateMargin(
127                     sourceResults, targetResults, difference.getDeletedStart(),
128                     difference.getAddedStart(), margin);
129 
130                 List<String> changedLines = _addMargins(
131                     sourceResults, sourceStringList,
132                     difference.getDeletedStart(), margin);
133 
134                 _addResults(
135                     sourceResults, sourceStringList, changedLines,
136                     difference.getDeletedStart(), difference.getDeletedEnd());
137 
138                 changedLines = _addMargins(
139                     targetResults, targetStringList, difference.getAddedStart(),
140                     margin);
141 
142                 int deletedLines =
143                     difference.getDeletedEnd() + 1 -
144                         difference.getDeletedStart();
145 
146                 for (int i = 0; i < deletedLines; i++) {
147                     changedLines.add(CONTEXT_LINE);
148                 }
149 
150                 DiffResult diffResult = new DiffResult(
151                     difference.getDeletedStart(), changedLines);
152 
153                 targetResults.add(diffResult);
154             }
155             else if (difference.getDeletedEnd() == Difference.NONE) {
156 
157                 // Lines were added to target only.
158 
159                 _highlightLines(
160                     targetStringList, addedMarkerStart, addedMarkerEnd,
161                     difference.getAddedStart(), difference.getAddedEnd());
162 
163                 margin = _calculateMargin(
164                     sourceResults, targetResults, difference.getDeletedStart(),
165                     difference.getAddedStart(), margin);
166 
167                 List<String> changedLines = _addMargins(
168                     sourceResults, sourceStringList,
169                     difference.getDeletedStart(), margin);
170 
171                 int addedLines =
172                     difference.getAddedEnd() + 1 - difference.getAddedStart();
173 
174                 for (int i = 0; i < addedLines; i++) {
175                     changedLines.add(CONTEXT_LINE);
176                 }
177 
178                 DiffResult diffResult = new DiffResult(
179                     difference.getAddedStart(), changedLines);
180 
181                 sourceResults.add(diffResult);
182 
183                 changedLines = _addMargins(
184                     targetResults, targetStringList, difference.getAddedStart(),
185                     margin);
186 
187                 _addResults(
188                     targetResults, targetStringList, changedLines,
189                     difference.getAddedStart(), difference.getAddedEnd());
190             }
191             else {
192 
193                 // Lines were deleted from source and added to target at the
194                 // same position. It needs to check for characters differences.
195 
196                 _checkCharDiffs(
197                     sourceResults, targetResults, sourceStringList,
198                     targetStringList, addedMarkerStart, addedMarkerEnd,
199                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
200             }
201         }
202 
203         return results;
204     }
205 
206     private static List<String> _addMargins(
207         List<DiffResult> results, List<String> stringList, int startPos,
208         int margin) {
209 
210         List<String> changedLines = new ArrayList<String>();
211 
212         if (margin == 0 || startPos == 0) {
213             return changedLines;
214         }
215 
216         int i = startPos - margin;
217 
218         for (; i < 0; i++) {
219             changedLines.add(CONTEXT_LINE);
220         }
221 
222         for (; i < startPos; i++) {
223             if (i < stringList.size()) {
224                 changedLines.add(stringList.get(i));
225             }
226         }
227 
228         return changedLines;
229     }
230 
231     private static void _addResults(
232         List<DiffResult> results, List<String> stringList,
233         List<String> changedLines, int start, int end) {
234 
235         changedLines.addAll(stringList.subList(start, end + 1));
236 
237         DiffResult diffResult = new DiffResult(
238             start, changedLines);
239 
240         results.add(diffResult);
241     }
242 
243     private static int _calculateMargin(
244         List<DiffResult> sourceResults, List<DiffResult> targetResults,
245         int sourceBeginPos, int targetBeginPos, int margin) {
246 
247         int sourceMargin = _checkOverlapping(
248             sourceResults, sourceBeginPos, margin);
249         int targetMargin = _checkOverlapping(
250             targetResults, targetBeginPos, margin);
251 
252         if (sourceMargin < targetMargin) {
253             return sourceMargin;
254         }
255 
256         return targetMargin;
257     }
258 
259     private static void _checkCharDiffs(
260         List<DiffResult> sourceResults, List<DiffResult> targetResults,
261         List<String> sourceStringList, List<String> targetStringList,
262         String addedMarkerStart, String addedMarkerEnd,
263         String deletedMarkerStart, String deletedMarkerEnd,
264         Difference difference, int margin) {
265 
266         boolean aligned = false;
267 
268         int i = difference.getDeletedStart();
269         int j = difference.getAddedStart();
270 
271         // A line with changed characters may have its position shifted some
272         // lines above or below. These for loops will try to align these lines.
273         // While these lines are not aligned, highlight them as either additions
274         // or deletions.
275 
276         for (; i <= difference.getDeletedEnd(); i++) {
277             for (; j <= difference.getAddedEnd(); j++) {
278                 if (_lineDiff(
279                         sourceResults, targetResults, sourceStringList,
280                         targetStringList, addedMarkerStart, addedMarkerEnd,
281                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
282 
283                     aligned = true;
284 
285                     break;
286                 }
287                 else {
288                     _highlightLines(
289                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
290                         j);
291 
292                     DiffResult targetResult = new DiffResult(
293                         j, targetStringList.subList(j, j + 1));
294 
295                     targetResults.add(targetResult);
296 
297                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
298                 }
299             }
300 
301             if (aligned) {
302                  break;
303             }
304             else {
305                 _highlightLines(
306                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
307                     i);
308 
309                 DiffResult sourceResult = new DiffResult(
310                     i, sourceStringList.subList(i, i + 1));
311 
312                 sourceResults.add(sourceResult);
313 
314                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
315             }
316         }
317 
318         i = i + 1;
319         j = j + 1;
320 
321         // Lines are aligned, check for differences of the following lines.
322 
323         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
324                 i++, j++) {
325 
326             _lineDiff(
327                 sourceResults, targetResults, sourceStringList,
328                 targetStringList, addedMarkerStart, addedMarkerEnd,
329                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
330         }
331 
332         // After the for loop above, some lines might remained unchecked.
333         // They are considered as deletions or additions.
334 
335         for (; i <= difference.getDeletedEnd();i++) {
336             _highlightLines(
337                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
338 
339             DiffResult sourceResult = new DiffResult(
340                 i, sourceStringList.subList(i, i + 1));
341 
342             sourceResults.add(sourceResult);
343 
344             targetResults.add(new DiffResult(i, CONTEXT_LINE));
345         }
346 
347         for (; j <= difference.getAddedEnd(); j++) {
348             _highlightLines(
349                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
350 
351             DiffResult targetResult = new DiffResult(
352                 j, targetStringList.subList(j, j + 1));
353 
354             targetResults.add(targetResult);
355 
356             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
357         }
358     }
359 
360     private static int _checkOverlapping(
361         List<DiffResult> results, int startPos, int margin) {
362 
363         if (results.size() == 0 || (startPos - margin) < 0) {
364             return margin;
365         }
366 
367         DiffResult lastDiff = results.get(results.size() - 1);
368 
369         if (lastDiff.getChangedLines().size() == 0) {
370             return margin;
371         }
372 
373         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
374             lastDiff.getChangedLines().size();
375 
376         int currentChangedLine = startPos - margin;
377 
378         if ((lastDiff.getChangedLines().size() == 1) &&
379             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
380 
381             currentChangedLine = currentChangedLine + 1;
382         }
383 
384         if (currentChangedLine < lastChangedLine) {
385             return margin + currentChangedLine - lastChangedLine;
386         }
387 
388         return margin;
389     }
390 
391     private static void _highlightChars(
392         List<String> stringList, String markerStart, String markerEnd,
393         int startPos, int endPos) {
394 
395         String start = markerStart + stringList.get(startPos);
396 
397         stringList.set(startPos, start);
398 
399         String end = stringList.get(endPos) + markerEnd;
400 
401         stringList.set(endPos, end);
402     }
403 
404     private static void _highlightLines(
405         List<String> stringList, String markerStart, String markerEnd,
406         int startPos, int endPos) {
407 
408         for (int i = startPos; i <= endPos; i++) {
409             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
410         }
411     }
412 
413     private static boolean _lineDiff(
414         List<DiffResult> sourceResults, List<DiffResult> targetResults,
415         List<String> sourceStringList, List<String> targetStringList,
416         String addedMarkerStart, String addedMarkerEnd,
417         String deletedMarkerStart, String deletedMarkerEnd,
418         int sourceChangedLine, int targetChangedLine, boolean aligned) {
419 
420         String source = sourceStringList.get(sourceChangedLine);
421         String target = targetStringList.get(targetChangedLine);
422 
423         // Convert the lines to lists where each element are chars of the lines.
424 
425         List<String> sourceList = _toList(source);
426         List<String> targetList = _toList(target);
427 
428         Diff diff = new Diff(sourceList, targetList);
429 
430         List<Difference> differences = diff.diff();
431 
432         Iterator<Difference> itr = differences.iterator();
433 
434         int deletedChars = 0;
435         int addedChars = 0;
436 
437         // The following while loop will calculate how many characters of
438         // the source line need to be changed to be equals to the target line.
439 
440         while (itr.hasNext() && !aligned) {
441             Difference difference = itr.next();
442 
443             if (difference.getDeletedEnd() != Difference.NONE) {
444                 deletedChars =
445                     deletedChars +
446                     (difference.getDeletedEnd() -
447                         difference.getDeletedStart() + 1);
448             }
449 
450             if (difference.getAddedEnd() != Difference.NONE) {
451                 addedChars =
452                     addedChars +
453                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
454             }
455         }
456 
457         // If a lot of changes were needed (more than half of the source line
458         // length), consider this as not aligned yet.
459 
460         if ((deletedChars > (sourceList.size() / 2)) ||
461             (addedChars > sourceList.size() / 2)) {
462 
463             return false;
464         }
465 
466         itr = differences.iterator();
467 
468         boolean sourceChanged = false;
469         boolean targetChanged = false;
470 
471         // Iterate over Differences between chars of these lines.
472 
473         while (itr.hasNext()) {
474             Difference difference = itr.next();
475 
476             if (difference.getAddedEnd() == Difference.NONE) {
477 
478                 // Chars were deleted from source only.
479 
480                 _highlightChars(
481                     sourceList, deletedMarkerStart,
482                     deletedMarkerEnd, difference.getDeletedStart(),
483                     difference.getDeletedEnd());
484 
485                 sourceChanged = true;
486             }
487             else if (difference.getDeletedEnd() == Difference.NONE) {
488 
489                 // Chars were added to target only.
490 
491                 _highlightChars(
492                     targetList, addedMarkerStart, addedMarkerEnd,
493                     difference.getAddedStart(), difference.getAddedEnd());
494 
495                 targetChanged = true;
496             }
497             else {
498 
499                 // Chars were both deleted and added.
500 
501                 _highlightChars(
502                     sourceList, deletedMarkerStart,
503                     deletedMarkerEnd, difference.getDeletedStart(),
504                     difference.getDeletedEnd());
505 
506                 sourceChanged = true;
507 
508                 _highlightChars(
509                     targetList, addedMarkerStart, addedMarkerEnd,
510                     difference.getAddedStart(), difference.getAddedEnd());
511 
512                 targetChanged = true;
513             }
514         }
515 
516         if (sourceChanged) {
517             DiffResult sourceResult = new DiffResult(
518                 sourceChangedLine, _toString(sourceList));
519 
520             sourceResults.add(sourceResult);
521 
522             if (!targetChanged) {
523                 DiffResult targetResult = new DiffResult(
524                     targetChangedLine, target);
525 
526                 targetResults.add(targetResult);
527             }
528         }
529 
530         if (targetChanged) {
531             if (!sourceChanged) {
532                 DiffResult sourceResult = new DiffResult(
533                     sourceChangedLine, source);
534 
535                 sourceResults.add(sourceResult);
536             }
537 
538             DiffResult targetResult = new DiffResult(
539                 targetChangedLine, _toString(targetList));
540 
541             targetResults.add(targetResult);
542         }
543 
544         return true;
545     }
546 
547     private static List<String> _toList(String line) {
548         String[] stringArray = line.split(StringPool.BLANK);
549 
550         List<String> result = new ArrayList<String>();
551 
552         for (int i = 1; i < stringArray.length; i++) {
553             result.add(stringArray[i]);
554         }
555 
556         return result;
557     }
558 
559     private static String _toString(List<String> line) {
560         StringBuilder sb = new StringBuilder();
561 
562         Iterator<String> itr = line.iterator();
563 
564         while (itr.hasNext()) {
565             sb.append(itr.next());
566         }
567 
568         return sb.toString();
569     }
570 
571 }