1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17   * SOFTWARE.
18   */
19  
20  package com.liferay.portal.util;
21  
22  import com.liferay.portal.kernel.util.DiffResult;
23  import com.liferay.portal.kernel.util.FileUtil;
24  import com.liferay.portal.kernel.util.StringPool;
25  
26  import java.io.Reader;
27  
28  import java.util.ArrayList;
29  import java.util.Iterator;
30  import java.util.List;
31  
32  import org.incava.util.diff.Diff;
33  import org.incava.util.diff.Difference;
34  
35  /**
36   * <a href="DiffImpl.java.html"><b><i>View Source</i></b></a>
37   *
38   * <p>
39   * This class can compare two different versions of a text. Source refers to
40   * the earliest version of the text and target refers to a modified version of
41   * source. Changes are considered either as a removal from the source or as an
42   * addition to the target. This class detects changes to an entire line and also
43   * detects changes within lines, such as, removal or addition of characters.
44   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
45   * </p>
46   *
47   * @author Bruno Farache
48   *
49   */
50  public class DiffImpl implements com.liferay.portal.kernel.util.Diff {
51  
52      /**
53       * This is a diff method with default values.
54       *
55       * @param       source the <code>Reader</code> of the source text
56       * @param       target the <code>Reader</code> of the target text
57       * @return      an array containing two lists of <code>DiffResults</code>,
58       *              the first element contains DiffResults related to changes
59       *              in source and the second element to changes in target
60       */
61      public List<DiffResult>[] diff(Reader source, Reader target) {
62          int margin = 2;
63  
64          return diff(
65              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
66      }
67  
68      /**
69       * The main entrance of this class. This method will compare the two texts,
70       * highlight the changes by enclosing them with markers and return a list
71       * of <code>DiffResults</code>.
72       *
73       * @param       source the <code>Reader</code> of the source text, this can
74       *              be for example, an instance of FileReader or StringReader
75       * @param       target the <code>Reader</code> of the target text
76       * @param       addedMarkerStart the initial marker for highlighting
77       *              additions
78       * @param       addedMarkerEnd the end marker for highlighting additions
79       * @param       deletedMarkerStart the initial marker for highlighting
80       *              removals
81       * @param       deletedMarkerEnd the end marker for highlighting removals
82       * @param       margin the number of lines that will be added before the
83       *              first changed line
84       * @return      an array containing two lists of <code>DiffResults</code>,
85       *              the first element contains DiffResults related to changes
86       *              in source and the second element to changes in target
87       */
88      public List<DiffResult>[] diff(
89          Reader source, Reader target, String addedMarkerStart,
90          String addedMarkerEnd, String deletedMarkerStart,
91          String deletedMarkerEnd, int margin) {
92  
93          List<DiffResult> sourceResults = new ArrayList<DiffResult>();
94          List<DiffResult> targetResults = new ArrayList<DiffResult>();
95  
96          List<DiffResult>[] results = new List[] {sourceResults, targetResults};
97  
98          // Convert the texts to Lists where each element are lines of the texts.
99  
100         List<String> sourceStringList = FileUtil.toList(source);
101         List<String> targetStringList = FileUtil.toList(target);
102 
103         // Make a a Diff of these lines and iterate over their Differences.
104 
105         Diff diff = new Diff(sourceStringList, targetStringList);
106 
107         List<Difference> differences = diff.diff();
108 
109         Iterator<Difference> itr = differences.iterator();
110 
111         while (itr.hasNext()) {
112             Difference difference = itr.next();
113 
114             if (difference.getAddedEnd() == Difference.NONE) {
115 
116                 // Lines were deleted from source only.
117 
118                 _highlightLines(
119                     sourceStringList, deletedMarkerStart,
120                     deletedMarkerEnd, difference.getDeletedStart(),
121                     difference.getDeletedEnd());
122 
123                 margin = _calculateMargin(
124                     sourceResults, targetResults, difference.getDeletedStart(),
125                     difference.getAddedStart(), margin);
126 
127                 List<String> changedLines = _addMargins(
128                     sourceResults, sourceStringList,
129                     difference.getDeletedStart(), margin);
130 
131                 _addResults(
132                     sourceResults, sourceStringList, changedLines,
133                     difference.getDeletedStart(), difference.getDeletedEnd());
134 
135                 changedLines = _addMargins(
136                     targetResults, targetStringList, difference.getAddedStart(),
137                     margin);
138 
139                 int deletedLines =
140                     difference.getDeletedEnd() + 1 -
141                         difference.getDeletedStart();
142 
143                 for (int i = 0; i < deletedLines; i++) {
144                     changedLines.add(CONTEXT_LINE);
145                 }
146 
147                 DiffResult diffResult = new DiffResult(
148                     difference.getDeletedStart(), changedLines);
149 
150                 targetResults.add(diffResult);
151             }
152             else if (difference.getDeletedEnd() == Difference.NONE) {
153 
154                 // Lines were added to target only.
155 
156                 _highlightLines(
157                     targetStringList, addedMarkerStart, addedMarkerEnd,
158                     difference.getAddedStart(), difference.getAddedEnd());
159 
160                 margin = _calculateMargin(
161                     sourceResults, targetResults, difference.getDeletedStart(),
162                     difference.getAddedStart(), margin);
163 
164                 List<String> changedLines = _addMargins(
165                     sourceResults, sourceStringList,
166                     difference.getDeletedStart(), margin);
167 
168                 int addedLines =
169                     difference.getAddedEnd() + 1 - difference.getAddedStart();
170 
171                 for (int i = 0; i < addedLines; i++) {
172                     changedLines.add(CONTEXT_LINE);
173                 }
174 
175                 DiffResult diffResult = new DiffResult(
176                     difference.getAddedStart(), changedLines);
177 
178                 sourceResults.add(diffResult);
179 
180                 changedLines = _addMargins(
181                     targetResults, targetStringList, difference.getAddedStart(),
182                     margin);
183 
184                 _addResults(
185                     targetResults, targetStringList, changedLines,
186                     difference.getAddedStart(), difference.getAddedEnd());
187             }
188             else {
189 
190                 // Lines were deleted from source and added to target at the
191                 // same position. It needs to check for characters differences.
192 
193                 _checkCharDiffs(
194                     sourceResults, targetResults, sourceStringList,
195                     targetStringList, addedMarkerStart, addedMarkerEnd,
196                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
197             }
198         }
199 
200         return results;
201     }
202 
203     private static List<String> _addMargins(
204         List<DiffResult> results, List<String> stringList, int startPos,
205         int margin) {
206 
207         List<String> changedLines = new ArrayList<String>();
208 
209         if (margin == 0 || startPos == 0) {
210             return changedLines;
211         }
212 
213         int i = startPos - margin;
214 
215         for (; i < 0; i++) {
216             changedLines.add(CONTEXT_LINE);
217         }
218 
219         for (; i < startPos; i++) {
220             if (i < stringList.size()) {
221                 changedLines.add(stringList.get(i));
222             }
223         }
224 
225         return changedLines;
226     }
227 
228     private static void _addResults(
229         List<DiffResult> results, List<String> stringList,
230         List<String> changedLines, int start, int end) {
231 
232         changedLines.addAll(stringList.subList(start, end + 1));
233 
234         DiffResult diffResult = new DiffResult(
235             start, changedLines);
236 
237         results.add(diffResult);
238     }
239 
240     private static int _calculateMargin(
241         List<DiffResult> sourceResults, List<DiffResult> targetResults,
242         int sourceBeginPos, int targetBeginPos, int margin) {
243 
244         int sourceMargin = _checkOverlapping(
245             sourceResults, sourceBeginPos, margin);
246         int targetMargin = _checkOverlapping(
247             targetResults, targetBeginPos, margin);
248 
249         if (sourceMargin < targetMargin) {
250             return sourceMargin;
251         }
252 
253         return targetMargin;
254     }
255 
256     private static void _checkCharDiffs(
257         List<DiffResult> sourceResults, List<DiffResult> targetResults,
258         List<String> sourceStringList, List<String> targetStringList,
259         String addedMarkerStart, String addedMarkerEnd,
260         String deletedMarkerStart, String deletedMarkerEnd,
261         Difference difference, int margin) {
262 
263         boolean aligned = false;
264 
265         int i = difference.getDeletedStart();
266         int j = difference.getAddedStart();
267 
268         // A line with changed characters may have its position shifted some
269         // lines above or below. These for loops will try to align these lines.
270         // While these lines are not aligned, highlight them as either additions
271         // or deletions.
272 
273         for (; i <= difference.getDeletedEnd(); i++) {
274             for (; j <= difference.getAddedEnd(); j++) {
275                 if (_lineDiff(
276                         sourceResults, targetResults, sourceStringList,
277                         targetStringList, addedMarkerStart, addedMarkerEnd,
278                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
279 
280                     aligned = true;
281 
282                     break;
283                 }
284                 else {
285                     _highlightLines(
286                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
287                         j);
288 
289                     DiffResult targetResult = new DiffResult(
290                         j, targetStringList.subList(j, j + 1));
291 
292                     targetResults.add(targetResult);
293 
294                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
295                 }
296             }
297 
298             if (aligned) {
299                  break;
300             }
301             else {
302                 _highlightLines(
303                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
304                     i);
305 
306                 DiffResult sourceResult = new DiffResult(
307                     i, sourceStringList.subList(i, i + 1));
308 
309                 sourceResults.add(sourceResult);
310 
311                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
312             }
313         }
314 
315         i = i + 1;
316         j = j + 1;
317 
318         // Lines are aligned, check for differences of the following lines.
319 
320         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
321                 i++, j++) {
322 
323             _lineDiff(
324                 sourceResults, targetResults, sourceStringList,
325                 targetStringList, addedMarkerStart, addedMarkerEnd,
326                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
327         }
328 
329         // After the for loop above, some lines might remained unchecked.
330         // They are considered as deletions or additions.
331 
332         for (; i <= difference.getDeletedEnd();i++) {
333             _highlightLines(
334                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
335 
336             DiffResult sourceResult = new DiffResult(
337                 i, sourceStringList.subList(i, i + 1));
338 
339             sourceResults.add(sourceResult);
340 
341             targetResults.add(new DiffResult(i, CONTEXT_LINE));
342         }
343 
344         for (; j <= difference.getAddedEnd(); j++) {
345             _highlightLines(
346                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
347 
348             DiffResult targetResult = new DiffResult(
349                 j, targetStringList.subList(j, j + 1));
350 
351             targetResults.add(targetResult);
352 
353             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
354         }
355     }
356 
357     private static int _checkOverlapping(
358         List<DiffResult> results, int startPos, int margin) {
359 
360         if (results.size() == 0 || (startPos - margin) < 0) {
361             return margin;
362         }
363 
364         DiffResult lastDiff = results.get(results.size() - 1);
365 
366         if (lastDiff.getChangedLines().size() == 0) {
367             return margin;
368         }
369 
370         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
371             lastDiff.getChangedLines().size();
372 
373         int currentChangedLine = startPos - margin;
374 
375         if ((lastDiff.getChangedLines().size() == 1) &&
376             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
377 
378             currentChangedLine = currentChangedLine + 1;
379         }
380 
381         if (currentChangedLine < lastChangedLine) {
382             return margin + currentChangedLine - lastChangedLine;
383         }
384 
385         return margin;
386     }
387 
388     private static void _highlightChars(
389         List<String> stringList, String markerStart, String markerEnd,
390         int startPos, int endPos) {
391 
392         String start = markerStart + stringList.get(startPos);
393 
394         stringList.set(startPos, start);
395 
396         String end = stringList.get(endPos) + markerEnd;
397 
398         stringList.set(endPos, end);
399     }
400 
401     private static void _highlightLines(
402         List<String> stringList, String markerStart, String markerEnd,
403         int startPos, int endPos) {
404 
405         for (int i = startPos; i <= endPos; i++) {
406             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
407         }
408     }
409 
410     private static boolean _lineDiff(
411         List<DiffResult> sourceResults, List<DiffResult> targetResults,
412         List<String> sourceStringList, List<String> targetStringList,
413         String addedMarkerStart, String addedMarkerEnd,
414         String deletedMarkerStart, String deletedMarkerEnd,
415         int sourceChangedLine, int targetChangedLine, boolean aligned) {
416 
417         String source = sourceStringList.get(sourceChangedLine);
418         String target = targetStringList.get(targetChangedLine);
419 
420         // Convert the lines to lists where each element are chars of the lines.
421 
422         List<String> sourceList = _toList(source);
423         List<String> targetList = _toList(target);
424 
425         Diff diff = new Diff(sourceList, targetList);
426 
427         List<Difference> differences = diff.diff();
428 
429         Iterator<Difference> itr = differences.iterator();
430 
431         int deletedChars = 0;
432         int addedChars = 0;
433 
434         // The following while loop will calculate how many characters of
435         // the source line need to be changed to be equals to the target line.
436 
437         while (itr.hasNext() && !aligned) {
438             Difference difference = itr.next();
439 
440             if (difference.getDeletedEnd() != Difference.NONE) {
441                 deletedChars =
442                     deletedChars +
443                     (difference.getDeletedEnd() -
444                         difference.getDeletedStart() + 1);
445             }
446 
447             if (difference.getAddedEnd() != Difference.NONE) {
448                 addedChars =
449                     addedChars +
450                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
451             }
452         }
453 
454         // If a lot of changes were needed (more than half of the source line
455         // length), consider this as not aligned yet.
456 
457         if ((deletedChars > (sourceList.size() / 2)) ||
458             (addedChars > sourceList.size() / 2)) {
459 
460             return false;
461         }
462 
463         itr = differences.iterator();
464 
465         boolean sourceChanged = false;
466         boolean targetChanged = false;
467 
468         // Iterate over Differences between chars of these lines.
469 
470         while (itr.hasNext()) {
471             Difference difference = itr.next();
472 
473             if (difference.getAddedEnd() == Difference.NONE) {
474 
475                 // Chars were deleted from source only.
476 
477                 _highlightChars(
478                     sourceList, deletedMarkerStart,
479                     deletedMarkerEnd, difference.getDeletedStart(),
480                     difference.getDeletedEnd());
481 
482                 sourceChanged = true;
483             }
484             else if (difference.getDeletedEnd() == Difference.NONE) {
485 
486                 // Chars were added to target only.
487 
488                 _highlightChars(
489                     targetList, addedMarkerStart, addedMarkerEnd,
490                     difference.getAddedStart(), difference.getAddedEnd());
491 
492                 targetChanged = true;
493             }
494             else {
495 
496                 // Chars were both deleted and added.
497 
498                 _highlightChars(
499                     sourceList, deletedMarkerStart,
500                     deletedMarkerEnd, difference.getDeletedStart(),
501                     difference.getDeletedEnd());
502 
503                 sourceChanged = true;
504 
505                 _highlightChars(
506                     targetList, addedMarkerStart, addedMarkerEnd,
507                     difference.getAddedStart(), difference.getAddedEnd());
508 
509                 targetChanged = true;
510             }
511         }
512 
513         if (sourceChanged) {
514             DiffResult sourceResult = new DiffResult(
515                 sourceChangedLine, _toString(sourceList));
516 
517             sourceResults.add(sourceResult);
518 
519             if (!targetChanged) {
520                 DiffResult targetResult = new DiffResult(
521                     targetChangedLine, target);
522 
523                 targetResults.add(targetResult);
524             }
525         }
526 
527         if (targetChanged) {
528             if (!sourceChanged) {
529                 DiffResult sourceResult = new DiffResult(
530                     sourceChangedLine, source);
531 
532                 sourceResults.add(sourceResult);
533             }
534 
535             DiffResult targetResult = new DiffResult(
536                 targetChangedLine, _toString(targetList));
537 
538             targetResults.add(targetResult);
539         }
540 
541         return true;
542     }
543 
544     private static List<String> _toList(String line) {
545         String[] stringArray = line.split(StringPool.BLANK);
546 
547         List<String> result = new ArrayList<String>();
548 
549         for (int i = 1; i < stringArray.length; i++) {
550             result.add(stringArray[i]);
551         }
552 
553         return result;
554     }
555 
556     private static String _toString(List<String> line) {
557         StringBuilder sb = new StringBuilder();
558 
559         Iterator<String> itr = line.iterator();
560 
561         while (itr.hasNext()) {
562             sb.append(itr.next());
563         }
564 
565         return sb.toString();
566     }
567 
568 }