ddmp.diff source code

1 /*
2  * Copyright 2008 Google Inc. All Rights Reserved.
3  * Copyright 2013-2014 Jan Krüger. All Rights Reserved.
4  * Author: fraser@google.com (Neil Fraser)
5  * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas)
6  * Author: jan@jandoe.de (Jan Krüger)
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *   http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  * Diff Match and Patch
21  * http://code.google.com/p/google-diff-match-patch/
22  */
23 module ddmp.diff;
24 
25 import ddmp.util;
26 
27 import std.array;
28 import std.conv;
29 import std.datetime : SysTime, Clock, UTC;
30 import std.exception : enforce;
31 import std..string : indexOf, endsWith, startsWith;
32 import std.uni;
33 import std.regex;
34 import std.algorithm : min, max;
35 import std.digest.sha;
36 import core.time;
37 
38 
39 Duration diffTimeout = 1.seconds;
40 int DIFF_EDIT_COST = 4;
41 
42 
43 /**
44 * Compute and return the source text (all equalities and deletions).
45 * @param diffs List of Diff objects.
46 * @return Source text.
47 */
48 string diff_text1(Diff[] diffs) {
49     auto text = appender!string();
50     foreach ( d; diffs ) {
51         if (d.operation != Operation.INSERT) {
52             text.put(d.text);
53         }
54     }
55     return text.data();
56 }
57 
58 /**
59 * Compute and return the destination text (all equalities and insertions).
60 * @param diffs List of Diff objects.
61 * @return Destination text.
62 */
63 string diff_text2(Diff[] diffs) {
64     auto text = appender!string();
65     foreach ( d; diffs ) {
66         if (d.operation != Operation.DELETE) {
67             text.put(d.text);
68         }
69     }
70     return text.data();
71 }
72 
73 
74 /**
75  * Compute the Levenshtein distance; the number of inserted, deleted or
76  * substituted characters.
77  * @param diffs List of Diff objects.
78  * @return Number of changes.
79  */
80 int levenshtein(Diff[] diffs) {
81     int levenshtein = 0;
82     int insertions = 0;
83     int deletions = 0;
84     foreach ( d ; diffs ) {
85         final switch (d.operation) {
86           case Operation.INSERT:
87             insertions += d.text.length;
88             break;
89           case Operation.DELETE:
90             deletions += d.text.length;
91             break;
92           case Operation.EQUAL:
93             // A deletion and an insertion is one substitution.
94             levenshtein += max(insertions, deletions);
95             insertions = 0;
96             deletions = 0;
97             break;
98         }
99     }
100     levenshtein += max(insertions, deletions);
101     return levenshtein;
102 }
103 
104 
105 /**
106  * Crush the diff into an encoded string which describes the operations
107  * required to transform text1 into text2.
108  * E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
109  * Operations are tab-separated.  Inserted text is escaped using %xx
110  * notation.
111  * @param diffs Array of Diff objects.
112  * @return Delta text.
113  */
114 string toDelta(in Diff[] diffs)
115 {
116     import std.range : walkLength;
117     import std.format : formattedWrite;
118     import std.uri : encode;
119     auto text = appender!string;
120     foreach (aDiff; diffs) {
121         final switch (aDiff.operation) {
122             case Operation.INSERT:
123                 text.formattedWrite("+%s\t", encode(aDiff.text).replace("%20", " "));
124                 break;
125             case Operation.DELETE:
126                 text.formattedWrite("-%s\t", aDiff.text.walkLength);
127                 break;
128             case Operation.EQUAL:
129                 text.formattedWrite("=%s\t", aDiff.text.walkLength);
130                 break;
131         }
132     }
133     string delta = text.data;
134     if (delta.length != 0) {
135         // Strip off trailing tab character.
136         delta = delta[0 .. $-1];
137         delta = unescapeForEncodeUriCompatibility(delta);
138     }
139     return delta;
140 }
141 
142 /**
143  * Given the original text1, and an encoded string which describes the
144  * operations required to transform text1 into text2, comAdde the full diff.
145  * @param text1 Source string for the diff.
146  * @param delta Delta text.
147  * @return Array of Diff objects or null if invalid.
148  * @throws ArgumentException If invalid input.
149  */
150 Diff[] fromDelta(string text1, string delta)
151 {
152     import std.algorithm;
153     import std.range;
154     import std..string : format;
155     import std.uri : decodeComponent;
156 
157     auto diffs = appender!(Diff[]);
158     foreach (token; delta.splitter("\t")) {
159         if (token.length == 0) {
160             // Blank tokens are ok (from a trailing \t).
161             continue;
162         }
163         // Each token begins with a one character parameter which specifies the
164         // operation of this token (delete, insert, equality).
165         string param = token[1 .. $];
166         switch (token[0]) {
167             case '+':
168                 // decode would change all "+" to " "
169                 param = param.replace("+", "%2b");
170                 param = decodeComponent(param);
171                 //} catch (UnsupportedEncodingException e) {
172                 //  // Not likely on modern system.
173                 //  throw new Error("This system does not support UTF-8.", e);
174                 //} catch (IllegalArgumentException e) {
175                 //  // Malformed URI sequence.
176                 //  throw new IllegalArgumentException(
177                 //      "Illegal escape in diff_fromDelta: " + param, e);
178                 //}
179                 diffs ~= Diff(Operation.INSERT, param);
180                 break;
181             case '-': // Fall through.
182             case '=':
183                 int n;
184                 try {
185                     n = param.to!int;
186                 } catch (ConvException e) {
187                     throw new Exception("Invalid number in diff_fromDelta: " ~ param);
188                 }
189                 enforce (n >= 0, "Negative number in diff_fromDelta: " ~ param);
190 
191                 string text;
192                 enforce (n <= text1.walkLength, "Delta length larger than source text length.");
193                 text = text1.takeExactly(n).array.to!string;
194                 text1.popFrontN(n);
195                 if (token[0] == '=') {
196                     diffs ~= Diff(Operation.EQUAL, text);
197                 } else {
198                     diffs ~= Diff(Operation.DELETE, text);
199                 }
200                 break;
201             default:
202                 // Anything else is an error.
203                 throw new Exception(
204                 "Invalid diff operation in diff_fromDelta: " ~ token[0]);
205         }
206     }
207     if (text1.length > 0)
208         throw new Exception("Delta length smaller than source text length.");
209     return diffs.data;
210 }
211 
212 struct LinesToCharsResult {
213     string text1;
214     string text2;
215     string[] uniqueStrings;
216     bool opEquals()(auto ref const LinesToCharsResult other) const {
217         return text1 == other.text1 && 
218                text2 == other.text2 &&
219                uniqueStrings == other.uniqueStrings;
220     }
221 }
222 
223 LinesToCharsResult linesToChars(string text1, string text2) 
224 {
225     size_t[string] lineHash;
226     LinesToCharsResult res;
227     res.uniqueStrings = [""];
228     res.text1 = linesToCharsMunge(text1, res.uniqueStrings, lineHash);
229     res.text2 = linesToCharsMunge(text2, res.uniqueStrings, lineHash);
230     return res;
231 }
232 
233 string linesToCharsMunge(string text, ref string[] lines, ref size_t[string] linehash)
234 {
235     sizediff_t lineStart = 0;
236     sizediff_t lineEnd = -1;
237     string line;
238     auto chars = appender!string();
239     while( lineEnd+1 < text.length ){
240         lineEnd = text.indexOfAlt("\n", lineStart);
241         if( lineEnd == -1 ) lineEnd = text.length - 1;
242         line = text[lineStart..lineEnd + 1];
243         lineStart = lineEnd + 1;
244 
245         if (auto pv = line in linehash) {
246             chars ~= cast(dchar)*pv;
247         } else {
248             lines ~= line;
249             linehash[line] = lines.length - 1;
250             chars ~= cast(dchar)(lines.length -1);
251         }
252     }
253     return chars.data();
254 }
255 
256 void charsToLines(Diff[] diffs, string[] lineArray)
257 {
258     foreach (ref d; diffs) {
259         auto str = appender!string();
260         foreach (dchar ch; d.text)
261             str.put(lineArray[ch]);
262         d.text = str.data();
263     }
264 }
265 
266 size_t commonPrefix(string text1, string text2)
267 {
268     auto n = min(text1.length, text2.length);
269     foreach (i; 0 .. n)
270         if (text1[i] != text2[i])
271             return i;
272     return n;
273 }
274 
275 size_t commonSuffix(string text1, string text2)
276 {
277     auto n = min(text1.length, text2.length);
278     foreach (i; 1 .. n+1)
279         if (text1[$-i] != text2[$-i])
280             return i-1;
281     return n;
282 }
283 
284 /**
285 * Determine if the suffix of one string is the prefix of another.
286 * @param text1 First string.
287 * @param text2 Second string.
288 * @return The number of characters common to the end of the first
289 *     string and the start of the second string.
290 */
291 size_t commonOverlap(string text1, string text2) {
292     // Cache the text lengths to prevent multiple calls.
293     auto text1_length = text1.length;
294     auto text2_length = text2.length;
295     // Eliminate the null case.
296     if (text1_length == 0 || text2_length == 0) return 0;
297 
298     // Truncate the longer string.
299     if (text1_length > text2_length) {
300         text1 = text1[$ - text2_length .. $];
301     } else if (text1_length < text2_length) {
302         text2 = text2[0 .. text1_length];
303     }
304     auto text_length = min(text1_length, text2_length);
305     // Quick check for the worst case.
306     if (text1 == text2) {
307         return text_length;
308     }
309 
310     // Start by looking for a single character match
311     // and increase length until no match is found.
312     // Performance analysis: http://neil.fraser.name/news/2010/11/04/
313     int best = 0;
314     int length = 1;
315     while (true) {
316         string pattern = text1[text_length - length .. $];
317         auto found = text2.indexOf(pattern);
318         if (found == -1) {
319             return best;
320         }
321         length += found;
322         if (found == 0 || text1[text_length - length .. $] == text2[0 .. length]) {
323             best = length;
324             length++;
325         }
326     }
327 }
328 
329 /**-
330 * The data structure representing a diff is a List of Diff objects:
331 * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"),
332 *  Diff(Operation.EQUAL, " world.")}
333 * which means: delete "Hello", add "Goodbye" and keep " world."
334 */
335 enum Operation { 
336     DELETE,
337     INSERT,
338     EQUAL
339 }
340 
341 
342 /**
343 * Struct representing one diff operation.
344 */
345 struct Diff {
346     Operation operation;
347     string text;
348 
349     this(Operation operation, string text)
350     {
351         this.operation = operation;
352         this.text = text;
353     }
354 
355     string toString()
356     {
357         //string prettyText = text.replace('\n', '\u00b6');
358         string op;
359         final switch(operation)  {
360             case Operation.DELETE:
361                 op = "DELETE"; break;
362             case Operation.INSERT:
363                 op = "INSERT"; break;
364             case Operation.EQUAL:
365                 op = "EQUAL"; break;
366         }
367         return "Diff(" ~ op ~ ",\"" ~ text ~ "\")";
368     }
369 
370     bool opEquals(const Diff other) const
371     {
372         return operation == other.operation && text == other.text;
373     }
374 }
375 
376 
377 /**
378  * Find the differences between two texts.
379  * Run a faster, slightly less optimal diff.
380  * This method allows the 'checklines' of diff_main() to be optional.
381  * Most of the time checklines is wanted, so default to true.
382  * @param text1 Old string to be diffed.
383  * @param text2 New string to be diffed.
384  * @return List of Diff objects.
385  */
386 Diff[] diff_main(string text1, string text2)
387 {
388     return diff_main(text1, text2, true);
389 }
390 
391 /**
392  * Find the differences between two texts.
393  * @param text1 Old string to be diffed.
394  * @param text2 New string to be diffed.
395  * @param checklines Speedup flag.  If false, then don't run a
396  *     line-level diff first to identify the changed areas.
397  *     If true, then run a faster slightly less optimal diff.
398  * @return List of Diff objects.
399  */
400 Diff[] diff_main(string text1, string text2, bool checklines)
401 {
402     // Set a deadline by which time the diff must be complete.
403     SysTime deadline;
404     if (diffTimeout <= 0.seconds) {
405         deadline = SysTime.max;
406     } else {
407         deadline = Clock.currTime(UTC()) + diffTimeout;
408     }
409     return diff_main(text1, text2, checklines, deadline);
410 }
411 
412 /**
413  * Find the differences between two texts.  Simplifies the problem by
414  * stripping any common prefix or suffix off the texts before diffing.
415  * @param text1 Old string to be diffed.
416  * @param text2 New string to be diffed.
417  * @param checklines Speedup flag.  If false, then don't run a
418  *     line-level diff first to identify the changed areas.
419  *     If true, then run a faster slightly less optimal diff.
420  * @param deadline Time when the diff should be complete by.  Used
421  *     internally for recursive calls.  Users should set DiffTimeout
422  *     instead.
423  * @return List of Diff objects.
424  */
425 Diff[] diff_main(string text1, string text2, bool checklines, SysTime deadline)
426 {
427     Diff[] diffs;
428     if( text1 == text2 ){
429         if( text1.length != 0 ) diffs ~= Diff(Operation.EQUAL, text1);
430         return diffs;
431     }
432 
433     auto pos = commonPrefix(text1, text2);
434     auto prefix = text1[0 .. pos];
435     text1 = text1[pos .. $];
436     text2 = text2[pos .. $];
437 
438     pos = commonSuffix(text1, text2);
439     auto suffix = text1[$ - pos .. $];
440     text1 = text1[0 .. $ - pos];
441     text2 = text2[0 .. $ - pos];
442 
443     // Compute the diff on the middle block.
444     diffs = computeDiffs(text1, text2, checklines, deadline);
445 
446       // Restore the prefix and suffix.
447     if( prefix.length != 0 ) {
448         diffs.insert(0, [Diff(Operation.EQUAL, prefix)]);
449     }
450     if( suffix.length != 0 ) {
451         diffs ~= Diff(Operation.EQUAL, suffix);
452     }
453 
454     cleanupMerge(diffs);
455     return diffs;
456 }
457 
458 
459 
460 struct HalfMatch {
461     string prefix1;
462     string suffix1;
463     string suffix2;
464     string prefix2;
465     string commonMiddle;
466 
467     bool opEquals()(auto ref const HalfMatch other) const {
468         return prefix1 == other.prefix1 &&
469                suffix1 == other.suffix1 &&
470                prefix2 == other.prefix2 &&
471                suffix2 == other.suffix2;
472     }
473 }
474 /*
475  * Do the two texts share a Substring which is at least half the length of
476  * the longer text?
477  * This speedup can produce non-minimal diffs.
478  * @param text1 First string.
479  * @param text2 Second string.
480  * @return Five element String array, containing the prefix of text1, the
481  *     suffix of text1, the prefix of text2, the suffix of text2 and the
482  *     common middle.  Or null if there was no match.
483  */
484 bool halfMatch(string text1, string text2, out HalfMatch halfmatch){
485     if (diffTimeout <= 0.seconds) {
486         // Don't risk returning a non-optimal diff if we have unlimited time.
487         return false;
488     }
489     string longtext = text1.length > text2.length ? text1 : text2;
490     string shorttext = text1.length > text2.length ? text2 : text1;
491     if( longtext.length < 4 || shorttext.length * 2 < longtext.length ) return false; //pointless
492     HalfMatch hm1;
493     HalfMatch hm2;
494     auto is_hm1 = halfMatchI(longtext, shorttext, (longtext.length + 3) / 4, hm1);
495     auto is_hm2 = halfMatchI(longtext, shorttext, (longtext.length + 1) / 2, hm2);
496     HalfMatch hm;
497     if( !is_hm1 && !is_hm2 ){ 
498         return false;
499     } else if( !is_hm2  ){
500         hm = hm1;
501     } else if( !is_hm1 ){
502         hm = hm2;
503     } else {
504         hm = hm1.commonMiddle.length > hm2.commonMiddle.length ? hm1 : hm2;
505     }
506 
507     if( text1.length > text2.length ) { 
508         halfmatch = hm;
509         return true;
510     }
511     halfmatch.prefix1 = hm.prefix2;
512     halfmatch.suffix1 = hm.suffix2;
513     halfmatch.prefix2 = hm.prefix1;
514     halfmatch.suffix2 = hm.suffix1;
515     halfmatch.commonMiddle = hm.commonMiddle;
516     return true;
517 }
518 
519 
520 bool halfMatchI(string longtext, string shorttext, sizediff_t i, out HalfMatch hm){
521     auto seed = longtext.substr(i, longtext.length / 4);
522     sizediff_t j = -1;
523     string best_common;
524     string best_longtext_a;
525     string best_longtext_b;
526     string best_shorttext_a;
527     string best_shorttext_b;
528     while( j < cast(sizediff_t)shorttext.length && ( j = shorttext.indexOfAlt(seed, j + 1)) != -1 ){
529         auto prefixLen = commonPrefix(longtext[i .. $], shorttext[j .. $]);
530         auto suffixLen = commonSuffix(longtext[0 .. i], shorttext[0 .. j]);
531         if( best_common.length < suffixLen + prefixLen ) {
532             best_common = shorttext.substr(j - suffixLen, suffixLen) ~ shorttext.substr(j, prefixLen);
533             best_longtext_a = longtext[0 .. i - suffixLen];
534             best_longtext_b = longtext[i + prefixLen .. $];
535             best_shorttext_a = shorttext[0 .. j - suffixLen];
536             best_shorttext_b = shorttext[j + prefixLen .. $];
537         }
538     }
539     if( best_common.length * 2 >= longtext.length ) {
540         hm.prefix1 = best_longtext_a;
541         hm.suffix1 = best_longtext_b;
542         hm.prefix2 = best_shorttext_a;
543         hm.suffix2 = best_shorttext_b;
544         hm.commonMiddle = best_common;
545         return true;
546     } else {
547         return false;
548     }
549 }
550 
551 
552 /**
553      * Find the differences between two texts.  Assumes that the texts do not
554      * have any common prefix or suffix.
555      * @param text1 Old string to be diffed.
556      * @param text2 New string to be diffed.
557      * @param checklines Speedup flag.  If false, then don't run a
558      *     line-level diff first to identify the changed areas.
559      *     If true, then run a faster slightly less optimal diff.
560      * @param deadline Time when the diff should be complete by.
561      * @return List of Diff objects.
562      */
563 Diff[] computeDiffs(string text1, string text2, bool checklines, SysTime deadline)
564 {
565     Diff[] diffs;
566 
567     if( text1.length == 0 ){
568         diffs ~= Diff(Operation.INSERT, text2);
569         return diffs;
570     }
571     if( text2.length == 0 ){
572         diffs ~= Diff(Operation.DELETE, text1);
573         return diffs;
574     }
575 
576     auto longtext = text1.length > text2.length ? text1 : text2;
577     auto shorttext = text1.length > text2.length ? text2 : text1;
578     auto i = longtext.indexOf(shorttext);
579     if( i != -1 ){
580         Operation op = (text1.length > text2.length) ? Operation.DELETE : Operation.INSERT;
581         diffs ~= Diff(op, longtext[0 .. i]);
582         diffs ~= Diff(Operation.EQUAL, shorttext);
583         diffs ~= Diff(op, longtext[i + shorttext.length .. $]);
584         return diffs;
585     }
586 
587     if( shorttext.length == 1 ){
588         diffs ~= Diff(Operation.DELETE, text1);
589         diffs ~= Diff(Operation.INSERT, text2);
590         return diffs;
591     }
592     HalfMatch hm;
593     auto is_hm = halfMatch(text1, text2, hm);
594     if( is_hm ){
595         auto diffs_a = diff_main(hm.prefix1, hm.prefix2, checklines, deadline);
596         auto diffs_b = diff_main(hm.suffix1, hm.suffix2, checklines, deadline);
597 
598         diffs = diffs_a;
599         diffs ~= Diff(Operation.EQUAL, hm.commonMiddle);
600         diffs ~= diffs_b;
601         return diffs;
602     }
603 
604     if( checklines && text1.length > 100 && text2.length > 100 ){
605         return diff_lineMode(text1, text2, deadline);
606     }
607 
608     return bisect(text1, text2, deadline);
609 }
610 
611 Diff[] diff_lineMode(string text1, string text2, SysTime deadline)
612 {
613     auto b = linesToChars(text1, text2);
614 
615     auto diffs = diff_main(b.text1, b.text2, false, deadline);
616 
617     charsToLines(diffs, b.uniqueStrings);
618     cleanupSemantic(diffs);
619 
620     diffs ~= Diff(Operation.EQUAL, "");
621     auto pointer = 0;
622     auto count_delete = 0;
623     auto count_insert = 0;
624     string text_delete;
625     string text_insert;
626     while( pointer < diffs.length ){
627         final switch( diffs[pointer].operation ) {
628             case Operation.INSERT:
629                 count_insert++;
630                 text_insert ~= diffs[pointer].text;
631                 break;
632             case Operation.DELETE:
633                 count_delete++;
634                 text_delete ~= diffs[pointer].text;
635                 break;
636             case Operation.EQUAL:
637                 if( count_delete >= 1 && count_insert >= 1 ){
638 
639                     diffs.remove(pointer - count_delete - count_insert,
640                                  count_delete + count_insert);
641 
642                     pointer = pointer - count_delete - count_insert;
643 
644                     auto a = diff_main(text_delete, text_insert, false, deadline);
645                     diffs.insert(pointer, a);
646                     pointer += a.length;
647                 }
648                 count_insert = 0;
649                 count_delete = 0;
650                 text_delete = "";
651                 text_insert = "";
652                 break;
653         }
654         pointer++;
655     }
656     diffs.remove(diffs.length - 1);
657     return diffs;
658 }
659 
660 Diff[] bisect(string text1, string text2, SysTime deadline)
661 {
662     auto text1_len = text1.length;
663     auto text2_len = text2.length;
664     auto max_d = (text1_len + text2_len + 1) / 2;
665     auto v_offset = max_d;
666     auto v_len = 2 * max_d;
667     sizediff_t[] v1;
668     sizediff_t[] v2;
669     for( auto x = 0; x < v_len; x++ ){
670         v1 ~= -1;
671         v2 ~= -1;
672     }
673     v1[v_offset + 1] = 0;
674     v2[v_offset + 1] = 0;
675     auto delta = text1_len - text2_len;
676     bool front = (delta % 2 != 0);
677     auto k1start = 0;
678     auto k1end = 0;
679     auto k2start = 0;
680     auto k2end = 0;
681     for( auto d = 0; d < max_d; d++ ){
682         // Bail out if deadline is reached.
683         if (Clock.currTime(UTC()) > deadline) {
684             break;
685         }
686 
687         for( auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ){
688             auto k1_offset = v_offset + k1;
689             sizediff_t x1;
690             if( k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1] ) {
691                 x1 = v1[k1_offset + 1];
692             } else {
693                 x1 = v1[k1_offset - 1] + 1;
694             }
695             auto y1 = x1 - k1;
696             while( x1 < text1_len && y1 < text2_len && text1[x1] == text2[y1] ){
697                 x1++;
698                 y1++;
699             }
700             v1[k1_offset] = x1;
701             if( x1 > text1_len) {
702                 k1end += 2;
703             } else if( y1 > text2_len ){
704                 k1start += 2;
705             } else if( front ){
706                 auto k2_offset = v_offset + delta - k1;
707                 if( k2_offset >= 0 && k2_offset < v_len && v2[k2_offset] != -1) {
708                     auto x2 = text1_len - v2[k2_offset];
709                     if( x1 >= x2 ) return bisectSplit(text1, text2, x1, y1, deadline);
710                 }
711             } 
712         }
713         for( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
714             auto k2_offset = v_offset + k2;
715             sizediff_t x2;
716             if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
717                 x2 = v2[k2_offset + 1];
718             } else {
719                 x2 = v2[k2_offset - 1] + 1;
720             }
721             auto y2 = x2 - k2;
722             while( x2 < text1_len && y2 < text2_len
723                     && text1[text1_len - x2 - 1]
724                     == text2[text2_len - y2 - 1] ){
725                 x2++;
726                 y2++;
727             }
728             v2[k2_offset] = x2;
729             if (x2 > text1_len) {
730                 // Ran off the left of the graph.
731                 k2end += 2;
732             } else if (y2 > text2_len) {
733                 // Ran off the top of the graph.
734                 k2start += 2;
735             } else if (!front) {
736                 auto k1_offset = v_offset + delta - k2;
737                 if (k1_offset >= 0 && k1_offset < v_len && v1[k1_offset] != -1) {
738                     auto x1 = v1[k1_offset];
739                     auto y1 = v_offset + x1 - k1_offset;
740                     // Mirror x2 onto top-left coordinate system.
741                     x2 = text1_len - v2[k2_offset];
742                     if (x1 >= x2) {
743                         // Overlap detected.
744                         return bisectSplit(text1, text2, x1, y1, deadline);
745                     }
746                 }
747             }
748         }
749     }
750     Diff[] diffs;
751     diffs ~= Diff(Operation.DELETE, text1);
752     diffs ~= Diff(Operation.INSERT, text2);
753     return diffs;
754 }
755 
756 
757 Diff[] bisectSplit(string text1, string text2, sizediff_t x, sizediff_t y, SysTime deadline)
758 {
759     auto text1a = text1[0 .. x];
760     auto text2a = text2[0 .. y];
761     auto text1b = text1[x .. $];
762     auto text2b = text2[y .. $];
763 
764     Diff[] diffs = diff_main(text1a, text2a, false, deadline);
765     Diff[] diffsb = diff_main(text1b, text2b, false, deadline);
766     diffs ~= diffsb;
767     return diffs;
768 }
769 
770 void cleanupSemantic(ref Diff[] diffs) 
771 {
772     bool changes = false;
773     size_t[] equalities;
774 
775     string last_equality = null;
776     size_t pointer = 0;
777     size_t length_insertions1 = 0;
778     size_t length_deletions1 = 0;
779     size_t length_insertions2 = 0;
780     size_t length_deletions2 = 0;
781 
782     while( pointer < diffs.length) {
783         if( diffs[pointer].operation == Operation.EQUAL ){
784             equalities ~= pointer;
785             length_insertions1 = length_insertions2;
786             length_deletions1 = length_deletions2;
787             length_insertions2 = 0;
788             length_deletions2 = 0;
789             last_equality = diffs[pointer].text;
790         } else {
791             if( diffs[pointer].operation == Operation.INSERT ){
792                 length_insertions2 += diffs[pointer].text.length;
793             } else {
794                 length_deletions2 += diffs[pointer].text.length;
795             }
796 
797             if( last_equality !is null && 
798                 (last_equality.length <= max(length_insertions1, length_deletions1))
799                 && (last_equality.length <= max(length_insertions2, length_deletions2)))
800             {
801                 // Duplicate record.
802                 diffs.insert(equalities[$-1], [Diff(Operation.DELETE, last_equality)]);
803                 diffs[equalities[$-1]+1] = Diff(Operation.INSERT, diffs[equalities[$-1]+1].text);
804 
805                 // Throw away the equality we just deleted.
806                 equalities.length--;
807                 if (equalities.length > 0) {
808                     // Throw away the previous equality (it needs to be reevaluated).
809                     equalities.length--;
810                 }
811                 equalities.assumeSafeAppend();
812 
813                 pointer = equalities.length > 0 ? equalities[$-1] : -1;
814                 length_insertions1 = 0;
815                 length_deletions1 = 0;
816                 length_insertions2 = 0;
817                 length_deletions2 = 0;
818                 last_equality = null;
819                 changes = true;
820             }
821         }
822         pointer++;
823     }
824 
825     if( changes ) {
826         cleanupMerge(diffs);
827     }
828     cleanupSemanticLossless(diffs);
829 
830     // Find any overlaps between deletions and insertions.
831     // e.g: <del>abcxxx</del><ins>xxxdef</ins>
832     //   -> <del>abc</del>xxx<ins>def</ins>
833     // e.g: <del>xxxabc</del><ins>defxxx</ins>
834     //   -> <ins>def</ins>xxx<del>abc</del>
835     // Only extract an overlap if it is as big as the edit ahead or behind it.
836 
837     pointer = 1;
838     while( pointer < diffs.length ){
839         if( diffs[pointer - 1].operation == Operation.DELETE &&
840             diffs[pointer].operation == Operation.INSERT) {
841             auto deletion = diffs[pointer - 1].text;
842             auto insertion = diffs[pointer].text;
843             auto overlap_len1 = commonOverlap(deletion, insertion);
844             auto overlap_len2 = commonOverlap(insertion, deletion);
845             if( overlap_len1 >= overlap_len2 ){
846                 if( overlap_len1 * 2 >= deletion.length || 
847                     overlap_len1 * 2 >= insertion.length) {
848                     //Overlap found.
849                     //Insert an equality and trim the surrounding edits.
850                     diffs.insert(pointer, [Diff(Operation.EQUAL, insertion[0 .. overlap_len1])]);
851                     diffs[pointer - 1].text = deletion[0 .. $ - overlap_len1];
852                     diffs[pointer + 1].text = insertion[overlap_len1 .. $];
853                     pointer++;
854                 }
855             } else {
856                 if( overlap_len2 * 2 >= deletion.length ||
857                     overlap_len2 * 2 >= insertion.length) {
858                     diffs.insert(pointer, [Diff(Operation.EQUAL, deletion[0 .. overlap_len2])]);
859 
860                     diffs[pointer - 1].operation = Operation.INSERT;
861                     diffs[pointer - 1].text = insertion[0 .. $ - overlap_len2];
862                     diffs[pointer + 1].operation = Operation.DELETE;
863                     diffs[pointer + 1].text = deletion[overlap_len2 .. $];
864                     pointer++;
865                 }
866             }
867             pointer++;
868         }
869         pointer++;
870     }
871 }
872 
873 /**
874  * Look for single edits surrounded on both sides by equalities
875  * which can be shifted sideways to align the edit to a word boundary.
876  * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
877  * @param diffs List of Diff objects.
878  */
879 void cleanupSemanticLossless(ref Diff[] diffs)
880 {
881     auto pointer = 1;
882     // Intentionally ignore the first and last element (don't need checking).
883     while( pointer < cast(sizediff_t)(diffs.length) - 1 ){
884         if( diffs[pointer-1].operation == Operation.EQUAL &&
885             diffs[pointer+1].operation == Operation.EQUAL) {
886             // This is a single edit surrounded by equalities.
887             auto equality1 = diffs[pointer-1].text;
888             auto edit = diffs[pointer].text;
889             auto equality2 = diffs[pointer+1].text;
890 
891             // First, shift the edit as far left as possible
892             auto commonOffset = commonSuffix(equality1, edit);
893             if( commonOffset > 0 ){
894                 auto commonString = edit[$ - commonOffset .. $];
895                 equality1 = equality1[0 .. $ - commonOffset];
896                 edit = commonString ~ edit[0 .. $ - commonOffset];
897                 equality2 = commonString ~ equality2;
898             }
899 
900             // Second, step character by character right,
901             // looking for the best fit.
902             auto best_equality1 = equality1;
903             auto best_edit = edit;
904             auto best_equality2 = equality2;
905             auto best_score = cleanupSemanticScore(equality1, edit) + cleanupSemanticScore(edit, equality2);
906             while( edit.length != 0 && equality2.length != 0 && edit[0] == equality2[0]){
907                 equality1 ~= edit[0];
908                 edit =  edit[1 .. $] ~ equality2[0];
909                 equality2 = equality2[1 .. $];
910                 auto score = cleanupSemanticScore(equality1, edit) + cleanupSemanticScore(edit, equality2);
911                 // The >= encourages trailing rather than leading whitespace on
912                 // edits.
913                 if (score >= best_score) {
914                     best_score = score;
915                     best_equality1 = equality1;
916                     best_edit = edit;
917                     best_equality2 = equality2;
918                 }
919             }
920 
921             if( diffs[pointer-1].text != best_equality1 ){
922                 // We have an improvement, save it back to the diff.
923                 if( best_equality1.length != 0) {
924                     diffs[pointer-1].text = best_equality1;
925                 } else {
926                     diffs.remove(pointer - 1);
927                     pointer--;
928                 }
929                 diffs[pointer].text = best_edit;
930                 if( best_equality2.length != 0 ){
931                     diffs[pointer+1].text = best_equality2;
932                 } else {
933                     diffs.remove(pointer + 1);
934                     pointer--;
935                 }
936             }
937         }
938         pointer++;
939     }
940 }
941 
942 
943 
944 
945 /**
946  * Reorder and merge like edit sections.  Merge equalities.
947  * Any edit section can move as sizediff_t as it doesn't cross an equality.
948  * @param diffs List of Diff objects.
949  */
950 void cleanupMerge(ref Diff[] diffs) {
951     diffs ~= Diff(Operation.EQUAL, "");
952     size_t pointer = 0;
953     size_t count_delete = 0;
954     size_t count_insert = 0;
955     string text_delete;
956     string text_insert;
957     while(pointer < diffs.length) {
958         final switch(diffs[pointer].operation){
959             case Operation.INSERT:
960                 count_insert++;
961                 text_insert ~= diffs[pointer].text;
962                 pointer++;
963                 break;
964             case Operation.DELETE:
965                 count_delete++;
966                 text_delete ~= diffs[pointer].text;
967                 pointer++;
968                 break;
969             case Operation.EQUAL:
970                 // Upon reaching an equality, check for prior redundancies.
971                 if (count_delete + count_insert > 1) {
972                     if (count_delete != 0 && count_insert != 0) {
973                         // Factor out any common prefixies.
974                         if (auto commonlength = commonPrefix(text_insert, text_delete)) {
975                             if (pointer > count_delete + count_insert &&
976                                 diffs[pointer - count_delete - count_insert - 1].operation
977                                     == Operation.EQUAL)
978                             {
979                                 diffs[pointer - count_delete - count_insert - 1].text
980                                     ~= text_insert[0 .. commonlength];
981                             } else {
982                                 diffs.insert(0, [Diff(Operation.EQUAL, text_insert[0 .. commonlength])]);
983                                 pointer++;
984                             }
985                             text_insert = text_insert[commonlength .. $];
986                             text_delete = text_delete[commonlength .. $];
987                         }
988                         // Factor out any common suffixies.
989                         if (auto commonlength = commonSuffix(text_insert, text_delete)) {
990                             diffs[pointer].text = text_insert[$ - commonlength .. $] ~ diffs[pointer].text;
991                             text_insert = text_insert[0 .. $ - commonlength];
992                             text_delete = text_delete[0 .. $ - commonlength];
993                         }
994                     }
995                     // Delete the offending records and add the merged ones.
996                     if (count_delete == 0) {
997                         diffs.splice(pointer - count_insert, count_delete + count_insert, [Diff(Operation.INSERT, text_insert)]);
998                     } else if (count_insert == 0) {
999 
1000                         diffs.splice(pointer - count_delete, count_delete + count_insert, [Diff(Operation.DELETE, text_delete)]);
1001                     } else {
1002                         diffs.splice(pointer - count_delete - count_insert, count_delete + count_insert, [Diff(Operation.DELETE, text_delete), Diff(Operation.INSERT, text_insert)]);
1003                     }
1004                     pointer = pointer - count_delete - count_insert +
1005                             (count_delete != 0 ? 1 : 0) + (count_insert != 0 ? 1 : 0) + 1;
1006                 } else if( pointer != 0 && diffs[pointer-1].operation == Operation.EQUAL ){
1007                     diffs[pointer - 1].text ~= diffs[pointer].text;
1008                     diffs.remove(pointer);
1009                 } else {
1010                     pointer++;
1011                 }
1012                 count_insert = 0;
1013                 count_delete = 0;
1014                 text_delete = "";
1015                 text_insert = "";
1016                 break;
1017         }
1018     }
1019     if( diffs[$-1].text.length == 0){
1020         diffs.length--;
1021     }
1022     
1023     bool changes = false;
1024     pointer = 1;
1025     while( pointer + 1 < diffs.length ) {
1026         if( diffs[pointer - 1].operation == Operation.EQUAL && 
1027             diffs[pointer + 1].operation == Operation.EQUAL)
1028         {
1029             if( diffs[pointer].text.endsWith(diffs[pointer - 1].text)) {
1030                 diffs[pointer].text = diffs[pointer - 1].text ~ diffs[pointer].text[0 .. $ - diffs[pointer - 1].text.length];
1031                 diffs[pointer + 1].text = diffs[pointer - 1].text ~ diffs[pointer + 1].text;
1032                 diffs.splice(pointer - 1, 1);
1033                 changes = true;
1034             } else if( diffs[pointer].text.startsWith(diffs[pointer + 1].text)) {
1035                 diffs[pointer - 1].text ~= diffs[pointer + 1].text;
1036                 diffs[pointer].text =
1037                     diffs[pointer].text[diffs[pointer + 1].text.length .. $]
1038                     ~ diffs[pointer + 1].text;
1039                 diffs.splice(pointer + 1, 1);
1040                 changes = true;
1041             }
1042         }
1043         pointer++;
1044     }
1045     if( changes ) cleanupMerge(diffs);
1046 
1047 }
1048 
1049 
1050 
1051 /**
1052  * Given two strings, comAdde a score representing whether the internal
1053  * boundary falls on logical boundaries.
1054  * Scores range from 6 (best) to 0 (worst).
1055  * @param one First string.
1056  * @param two Second string.
1057  * @return The score.
1058  */
1059 int cleanupSemanticScore(string one, string two) 
1060 {
1061     if( one.length == 0 || two.length == 0) return 6; //Edges are the best
1062     auto char1 = one[$-1];
1063     auto char2 = two[0];
1064 
1065     auto nonAlphaNumeric1 = !(isAlpha(char1) || isNumber(char1));    
1066     auto nonAlphaNumeric2 = !(isAlpha(char2) || isNumber(char2));
1067     auto whitespace1 = nonAlphaNumeric1 && isWhite(char1);
1068     auto whitespace2 = nonAlphaNumeric2 && isWhite(char2);
1069     auto lineBreak1 = whitespace1 && isControl(char1);
1070     auto lineBreak2 = whitespace2 && isControl(char2);
1071     auto blankLine1 = lineBreak1 &&  match(one, `\n\r?\n\Z`);
1072     auto blankLine2 = lineBreak2 &&  match(two, `\A\r?\n\r?\n`);
1073 
1074     if (blankLine1 || blankLine2) return 5;
1075     else if (lineBreak1 || lineBreak2) return 4;
1076     else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) return 3;
1077     else if (whitespace1 || whitespace2) return 2;
1078     else if (nonAlphaNumeric1 || nonAlphaNumeric2) return 1;
1079     
1080     return 0;
1081 }
1082 
1083 
1084 /**
1085  * Reduce the number of edits by eliminating operationally trivial
1086  * equalities.
1087  * @param diffs List of Diff objects.
1088  */
1089 void cleanupEfficiency(ref Diff[] diffs) {
1090     bool changes = false;
1091     size_t[] equalities;
1092     string lastequality;
1093     size_t pointer = 0;
1094     auto pre_ins = false;
1095     auto pre_del = false;
1096     auto post_ins = false;
1097     auto post_del = false;
1098     while( pointer < diffs.length ){
1099         if( diffs[pointer].operation == Operation.EQUAL ){
1100             if( diffs[pointer].text.length < DIFF_EDIT_COST && (post_ins || post_del)) {
1101                 equalities ~= pointer;
1102                 pre_ins = post_ins;
1103                 pre_del = post_del;
1104                 lastequality = diffs[pointer].text;
1105             } else {
1106                 equalities.length = 0;
1107                 equalities.assumeSafeAppend;
1108                 lastequality = "";
1109             }
1110             post_ins = false;
1111             post_del = false;
1112         } else {
1113             if( diffs[pointer].operation == Operation.DELETE ){
1114                 post_del = true;
1115             } else {
1116                 post_ins = true;
1117             }
1118 
1119             if( lastequality.length != 0
1120                 && (
1121                     (pre_ins && pre_del && post_ins && post_del)
1122                     || ((lastequality.length < DIFF_EDIT_COST / 2)
1123                         && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3)
1124                     )
1125                 )
1126             {
1127                 diffs.insert(equalities[$-1], [Diff(Operation.DELETE, lastequality)]);
1128                 diffs[equalities[$-1] + 1].operation = Operation.INSERT;
1129                 equalities.length--;
1130                 equalities.assumeSafeAppend;
1131                 lastequality = "";
1132                 if( pre_ins && pre_del ){
1133                     post_ins = true;
1134                     post_del = true;
1135                     equalities.length = 0;
1136                     equalities.assumeSafeAppend;
1137                 } else {
1138                     if( equalities.length > 0 ) {
1139                         equalities.length--;
1140                         equalities.assumeSafeAppend;
1141                     }
1142 
1143                     pointer = equalities.length > 0 ? equalities[$-1] : -1;
1144                     post_ins = false;
1145                     post_del = false;
1146                 }
1147                 changes = true;
1148             }
1149         }
1150         pointer++;
1151     }
1152 
1153     if( changes ){
1154         cleanupMerge(diffs);
1155     }
1156 }
1157 
1158 /**
1159  * loc is a location in text1, comAdde and return the equivalent location in
1160  * text2.
1161  * e.g. "The cat" vs "The big cat", 1->1, 5->8
1162  * @param diffs List of Diff objects.
1163  * @param loc Location within text1.
1164  * @return Location within text2.
1165  */
1166 sizediff_t xIndex(Diff[] diffs, sizediff_t loc){
1167     auto chars1 = 0;
1168     auto chars2 = 0;
1169     auto last_chars1 = 0;
1170     auto last_chars2 = 0;
1171     Diff lastDiff;
1172     foreach ( diff; diffs) {
1173         if (diff.operation != Operation.INSERT) {
1174             // Equality or deletion.
1175             chars1 += diff.text.length;
1176         }
1177         if (diff.operation != Operation.DELETE) {
1178             // Equality or insertion.
1179             chars2 += diff.text.length;
1180         }
1181         if (chars1 > loc) {
1182             // Overshot the location.
1183             lastDiff = diff;
1184             break;
1185         }
1186         last_chars1 = chars1;
1187         last_chars2 = chars2;
1188     }
1189     if (lastDiff.operation == Operation.DELETE) {
1190         // The location was deleted.
1191         return last_chars2;
1192     }
1193     // Add the remaining character length.
1194     return last_chars2 + (loc - last_chars1);
1195 }
1196 
1197 /**
1198  * Unescape selected chars for compatability with JavaScript's encodeURI.
1199  * In speed critical applications this could be dropped since the
1200  * receiving application will certainly decode these fine.
1201  * Note that this function is case-sensitive.  Thus "%3F" would not be
1202  * unescaped.  But this is ok because it is only called with the output of
1203  * HttpUtility.UrlEncode which returns lowercase hex.
1204  *
1205  * Example: "%3f" -> "?", "%24" -> "$", etc.
1206  *
1207  * @param str The string to escape.
1208  * @return The escaped string.
1209  */
1210 public static string unescapeForEncodeUriCompatibility(string str)
1211 {
1212     // FIXME: this is ridiculously inefficient
1213     return str.replace("%21", "!").replace("%7e", "~")
1214       .replace("%27", "'").replace("%28", "(").replace("%29", ")")
1215       .replace("%3b", ";").replace("%2f", "/").replace("%3f", "?")
1216       .replace("%3a", ":").replace("%40", "@").replace("%26", "&")
1217       .replace("%3d", "=").replace("%2b", "+").replace("%24", "$")
1218       .replace("%2c", ",").replace("%23", "#");
1219 }