1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17   * SOFTWARE.
18   */
19  
20  package com.liferay.util.diff;
21  
22  import com.liferay.portal.kernel.util.FileUtil;
23  import com.liferay.portal.kernel.util.StringPool;
24  
25  import java.io.Reader;
26  
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  
31  import org.incava.util.diff.Diff;
32  import org.incava.util.diff.Difference;
33  
34  /**
35   * <a href="DiffUtil.java.html"><b><i>View Source</i></b></a>
36   *
37   * <p>
38   * This class can compare two different versions of a text. Source refers to
39   * the earliest version of the text and target refers to a modified version of
40   * source. Changes are considered either as a removal from the source or as an
41   * addition to the target. This class detects changes to an entire line and also
42   * detects changes within lines, such as, removal or addition of characters.
43   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
44   * </p>
45   *
46   * @author Bruno Farache
47   *
48   * @deprecated This class has been repackaged at
49   * <code>com.liferay.portal.kernel.util</code>.
50   *
51   */
52  public class DiffUtil {
53  
54      public static final String OPEN_INS = "<ins>";
55  
56      public static final String CLOSE_INS = "</ins>";
57  
58      public static final String OPEN_DEL = "<del>";
59  
60      public static final String CLOSE_DEL = "</del>";
61  
62      public static final String CONTEXT_LINE = "#context#line#";
63  
64      /**
65       * This is a diff method with default values.
66       *
67       * @param       source the <code>Reader</code> of the source text
68       * @param       target the <code>Reader</code> of the target text
69       * @return      an array containing two lists of <code>DiffResults</code>,
70       *              the first element contains DiffResults related to changes
71       *              in source and the second element to changes in target
72       */
73      public static List<DiffResult>[] diff(Reader source, Reader target) {
74          int margin = 2;
75  
76          return diff(
77              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
78      }
79  
80      /**
81       * The main entrance of this class. This method will compare the two texts,
82       * highlight the changes by enclosing them with markers and return a list
83       * of <code>DiffResults</code>.
84       *
85       * @param       source the <code>Reader</code> of the source text, this can
86       *              be for example, an instance of FileReader or StringReader
87       * @param       target the <code>Reader</code> of the target text
88       * @param       addedMarkerStart the initial marker for highlighting
89       *              additions
90       * @param       addedMarkerEnd the end marker for highlighting additions
91       * @param       deletedMarkerStart the initial marker for highlighting
92       *              removals
93       * @param       deletedMarkerEnd the end marker for highlighting removals
94       * @param       margin the number of lines that will be added before the
95       *              first changed line
96       * @return      an array containing two lists of <code>DiffResults</code>,
97       *              the first element contains DiffResults related to changes
98       *              in source and the second element to changes in target
99       */
100     public static List<DiffResult>[] diff(
101         Reader source, Reader target, String addedMarkerStart,
102         String addedMarkerEnd, String deletedMarkerStart,
103         String deletedMarkerEnd, int margin) {
104 
105         List<DiffResult> sourceResults = new ArrayList<DiffResult>();
106         List<DiffResult> targetResults = new ArrayList<DiffResult>();
107 
108         List<DiffResult>[] results = new List[] {sourceResults, targetResults};
109 
110         // Convert the texts to Lists where each element are lines of the texts.
111 
112         List<String> sourceStringList = FileUtil.toList(source);
113         List<String> targetStringList = FileUtil.toList(target);
114 
115         // Make a a Diff of these lines and iterate over their Differences.
116 
117         Diff diff = new Diff(sourceStringList, targetStringList);
118 
119         List<Difference> differences = diff.diff();
120 
121         Iterator<Difference> itr = differences.iterator();
122 
123         while (itr.hasNext()) {
124             Difference difference = itr.next();
125 
126             if (difference.getAddedEnd() == Difference.NONE) {
127 
128                 // Lines were deleted from source only.
129 
130                 _highlightLines(
131                     sourceStringList, deletedMarkerStart,
132                     deletedMarkerEnd, difference.getDeletedStart(),
133                     difference.getDeletedEnd());
134 
135                 margin = _calculateMargin(
136                     sourceResults, targetResults, difference.getDeletedStart(),
137                     difference.getAddedStart(), margin);
138 
139                 List<String> changedLines = _addMargins(
140                     sourceResults, sourceStringList,
141                     difference.getDeletedStart(), margin);
142 
143                 _addResults(
144                     sourceResults, sourceStringList, changedLines,
145                     difference.getDeletedStart(), difference.getDeletedEnd());
146 
147                 changedLines = _addMargins(
148                     targetResults, targetStringList, difference.getAddedStart(),
149                     margin);
150 
151                 int deletedLines =
152                     difference.getDeletedEnd() + 1 -
153                         difference.getDeletedStart();
154 
155                 for (int i = 0; i < deletedLines; i++) {
156                     changedLines.add(CONTEXT_LINE);
157                 }
158 
159                 DiffResult diffResult = new DiffResult(
160                     difference.getDeletedStart(), changedLines);
161 
162                 targetResults.add(diffResult);
163             }
164             else if (difference.getDeletedEnd() == Difference.NONE) {
165 
166                 // Lines were added to target only.
167 
168                 _highlightLines(
169                     targetStringList, addedMarkerStart, addedMarkerEnd,
170                     difference.getAddedStart(), difference.getAddedEnd());
171 
172                 margin = _calculateMargin(
173                     sourceResults, targetResults, difference.getDeletedStart(),
174                     difference.getAddedStart(), margin);
175 
176                 List<String> changedLines = _addMargins(
177                     sourceResults, sourceStringList,
178                     difference.getDeletedStart(), margin);
179 
180                 int addedLines =
181                     difference.getAddedEnd() + 1 - difference.getAddedStart();
182 
183                 for (int i = 0; i < addedLines; i++) {
184                     changedLines.add(CONTEXT_LINE);
185                 }
186 
187                 DiffResult diffResult = new DiffResult(
188                     difference.getAddedStart(), changedLines);
189 
190                 sourceResults.add(diffResult);
191 
192                 changedLines = _addMargins(
193                     targetResults, targetStringList, difference.getAddedStart(),
194                     margin);
195 
196                 _addResults(
197                     targetResults, targetStringList, changedLines,
198                     difference.getAddedStart(), difference.getAddedEnd());
199             }
200             else {
201 
202                 // Lines were deleted from source and added to target at the
203                 // same position. It needs to check for characters differences.
204 
205                 _checkCharDiffs(
206                     sourceResults, targetResults, sourceStringList,
207                     targetStringList, addedMarkerStart, addedMarkerEnd,
208                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
209             }
210         }
211 
212         return results;
213     }
214 
215     private static List<String> _addMargins(
216         List<DiffResult> results, List<String> stringList, int startPos,
217         int margin) {
218 
219         List<String> changedLines = new ArrayList<String>();
220 
221         if (margin == 0 || startPos == 0) {
222             return changedLines;
223         }
224 
225         int i = startPos - margin;
226 
227         for (; i < 0; i++) {
228             changedLines.add(CONTEXT_LINE);
229         }
230 
231         for (; i < startPos; i++) {
232             if (i < stringList.size()) {
233                 changedLines.add(stringList.get(i));
234             }
235         }
236 
237         return changedLines;
238     }
239 
240     private static void _addResults(
241         List<DiffResult> results, List<String> stringList,
242         List<String> changedLines, int start, int end) {
243 
244         changedLines.addAll(stringList.subList(start, end + 1));
245 
246         DiffResult diffResult = new DiffResult(
247             start, changedLines);
248 
249         results.add(diffResult);
250     }
251 
252     private static int _calculateMargin(
253         List<DiffResult> sourceResults, List<DiffResult> targetResults,
254         int sourceBeginPos, int targetBeginPos, int margin) {
255 
256         int sourceMargin = _checkOverlapping(
257             sourceResults, sourceBeginPos, margin);
258         int targetMargin = _checkOverlapping(
259             targetResults, targetBeginPos, margin);
260 
261         if (sourceMargin < targetMargin) {
262             return sourceMargin;
263         }
264 
265         return targetMargin;
266     }
267 
268     private static void _checkCharDiffs(
269         List<DiffResult> sourceResults, List<DiffResult> targetResults,
270         List<String> sourceStringList, List<String> targetStringList,
271         String addedMarkerStart, String addedMarkerEnd,
272         String deletedMarkerStart, String deletedMarkerEnd,
273         Difference difference, int margin) {
274 
275         boolean aligned = false;
276 
277         int i = difference.getDeletedStart();
278         int j = difference.getAddedStart();
279 
280         // A line with changed characters may have its position shifted some
281         // lines above or below. These for loops will try to align these lines.
282         // While these lines are not aligned, highlight them as either additions
283         // or deletions.
284 
285         for (; i <= difference.getDeletedEnd(); i++) {
286             for (; j <= difference.getAddedEnd(); j++) {
287                 if (_lineDiff(
288                         sourceResults, targetResults, sourceStringList,
289                         targetStringList, addedMarkerStart, addedMarkerEnd,
290                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
291 
292                     aligned = true;
293 
294                     break;
295                 }
296                 else {
297                     _highlightLines(
298                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
299                         j);
300 
301                     DiffResult targetResult = new DiffResult(
302                         j, targetStringList.subList(j, j + 1));
303 
304                     targetResults.add(targetResult);
305 
306                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
307                 }
308             }
309 
310             if (aligned) {
311                  break;
312             }
313             else {
314                 _highlightLines(
315                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
316                     i);
317 
318                 DiffResult sourceResult = new DiffResult(
319                     i, sourceStringList.subList(i, i + 1));
320 
321                 sourceResults.add(sourceResult);
322 
323                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
324             }
325         }
326 
327         i = i + 1;
328         j = j + 1;
329 
330         // Lines are aligned, check for differences of the following lines.
331 
332         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
333                 i++, j++) {
334 
335             _lineDiff(
336                 sourceResults, targetResults, sourceStringList,
337                 targetStringList, addedMarkerStart, addedMarkerEnd,
338                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
339         }
340 
341         // After the for loop above, some lines might remained unchecked.
342         // They are considered as deletions or additions.
343 
344         for (; i <= difference.getDeletedEnd();i++) {
345             _highlightLines(
346                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
347 
348             DiffResult sourceResult = new DiffResult(
349                 i, sourceStringList.subList(i, i + 1));
350 
351             sourceResults.add(sourceResult);
352 
353             targetResults.add(new DiffResult(i, CONTEXT_LINE));
354         }
355 
356         for (; j <= difference.getAddedEnd(); j++) {
357             _highlightLines(
358                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
359 
360             DiffResult targetResult = new DiffResult(
361                 j, targetStringList.subList(j, j + 1));
362 
363             targetResults.add(targetResult);
364 
365             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
366         }
367     }
368 
369     private static int _checkOverlapping(
370         List<DiffResult> results, int startPos, int margin) {
371 
372         if (results.size() == 0 || (startPos - margin) < 0) {
373             return margin;
374         }
375 
376         DiffResult lastDiff = results.get(results.size() - 1);
377 
378         if (lastDiff.getChangedLines().size() == 0) {
379             return margin;
380         }
381 
382         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
383             lastDiff.getChangedLines().size();
384 
385         int currentChangedLine = startPos - margin;
386 
387         if ((lastDiff.getChangedLines().size() == 1) &&
388             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
389 
390             currentChangedLine = currentChangedLine + 1;
391         }
392 
393         if (currentChangedLine < lastChangedLine) {
394             return margin + currentChangedLine - lastChangedLine;
395         }
396 
397         return margin;
398     }
399 
400     private static void _highlightChars(
401         List<String> stringList, String markerStart, String markerEnd,
402         int startPos, int endPos) {
403 
404         String start = markerStart + stringList.get(startPos);
405 
406         stringList.set(startPos, start);
407 
408         String end = stringList.get(endPos) + markerEnd;
409 
410         stringList.set(endPos, end);
411     }
412 
413     private static void _highlightLines(
414         List<String> stringList, String markerStart, String markerEnd,
415         int startPos, int endPos) {
416 
417         for (int i = startPos; i <= endPos; i++) {
418             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
419         }
420     }
421 
422     private static boolean _lineDiff(
423         List<DiffResult> sourceResults, List<DiffResult> targetResults,
424         List<String> sourceStringList, List<String> targetStringList,
425         String addedMarkerStart, String addedMarkerEnd,
426         String deletedMarkerStart, String deletedMarkerEnd,
427         int sourceChangedLine, int targetChangedLine, boolean aligned) {
428 
429         String source = sourceStringList.get(sourceChangedLine);
430         String target = targetStringList.get(targetChangedLine);
431 
432         // Convert the lines to lists where each element are chars of the lines.
433 
434         List<String> sourceList = _toList(source);
435         List<String> targetList = _toList(target);
436 
437         Diff diff = new Diff(sourceList, targetList);
438 
439         List<Difference> differences = diff.diff();
440 
441         Iterator<Difference> itr = differences.iterator();
442 
443         int deletedChars = 0;
444         int addedChars = 0;
445 
446         // The following while loop will calculate how many characters of
447         // the source line need to be changed to be equals to the target line.
448 
449         while (itr.hasNext() && !aligned) {
450             Difference difference = itr.next();
451 
452             if (difference.getDeletedEnd() != Difference.NONE) {
453                 deletedChars =
454                     deletedChars +
455                     (difference.getDeletedEnd() -
456                         difference.getDeletedStart() + 1);
457             }
458 
459             if (difference.getAddedEnd() != Difference.NONE) {
460                 addedChars =
461                     addedChars +
462                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
463             }
464         }
465 
466         // If a lot of changes were needed (more than half of the source line
467         // length), consider this as not aligned yet.
468 
469         if ((deletedChars > (sourceList.size() / 2)) ||
470             (addedChars > sourceList.size() / 2)) {
471 
472             return false;
473         }
474 
475         itr = differences.iterator();
476 
477         boolean sourceChanged = false;
478         boolean targetChanged = false;
479 
480         // Iterate over Differences between chars of these lines.
481 
482         while (itr.hasNext()) {
483             Difference difference = itr.next();
484 
485             if (difference.getAddedEnd() == Difference.NONE) {
486 
487                 // Chars were deleted from source only.
488 
489                 _highlightChars(
490                     sourceList, deletedMarkerStart,
491                     deletedMarkerEnd, difference.getDeletedStart(),
492                     difference.getDeletedEnd());
493 
494                 sourceChanged = true;
495             }
496             else if (difference.getDeletedEnd() == Difference.NONE) {
497 
498                 // Chars were added to target only.
499 
500                 _highlightChars(
501                     targetList, addedMarkerStart, addedMarkerEnd,
502                     difference.getAddedStart(), difference.getAddedEnd());
503 
504                 targetChanged = true;
505             }
506             else {
507 
508                 // Chars were both deleted and added.
509 
510                 _highlightChars(
511                     sourceList, deletedMarkerStart,
512                     deletedMarkerEnd, difference.getDeletedStart(),
513                     difference.getDeletedEnd());
514 
515                 sourceChanged = true;
516 
517                 _highlightChars(
518                     targetList, addedMarkerStart, addedMarkerEnd,
519                     difference.getAddedStart(), difference.getAddedEnd());
520 
521                 targetChanged = true;
522             }
523         }
524 
525         if (sourceChanged) {
526             DiffResult sourceResult = new DiffResult(
527                 sourceChangedLine, _toString(sourceList));
528 
529             sourceResults.add(sourceResult);
530 
531             if (!targetChanged) {
532                 DiffResult targetResult = new DiffResult(
533                     targetChangedLine, target);
534 
535                 targetResults.add(targetResult);
536             }
537         }
538 
539         if (targetChanged) {
540             if (!sourceChanged) {
541                 DiffResult sourceResult = new DiffResult(
542                     sourceChangedLine, source);
543 
544                 sourceResults.add(sourceResult);
545             }
546 
547             DiffResult targetResult = new DiffResult(
548                 targetChangedLine, _toString(targetList));
549 
550             targetResults.add(targetResult);
551         }
552 
553         return true;
554     }
555 
556     private static List<String> _toList(String line) {
557         String[] stringArray = line.split(StringPool.BLANK);
558 
559         List<String> result = new ArrayList<String>();
560 
561         for (int i = 1; i < stringArray.length; i++) {
562             result.add(stringArray[i]);
563         }
564 
565         return result;
566     }
567 
568     private static String _toString(List<String> line) {
569         StringBuilder sb = new StringBuilder();
570 
571         Iterator<String> itr = line.iterator();
572 
573         while (itr.hasNext()) {
574             sb.append(itr.next());
575         }
576 
577         return sb.toString();
578     }
579 
580 }