ddmp.diff source code

1 /*
2  * Copyright 2008 Google Inc. All Rights Reserved.
3  * Copyright 2013-2014 Jan Krüger. All Rights Reserved.
4  * Author: fraser@google.com (Neil Fraser)
5  * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas)
6  * Author: jan@jandoe.de (Jan Krüger)
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *   http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  * Diff Match and Patch
21  * http://code.google.com/p/google-diff-match-patch/
22  */
23 module ddmp.diff;
24 
25 import ddmp.util;
26 
27 import std.array;
28 import std.conv;
29 import std.datetime : SysTime, Clock, UTC;
30 import std.exception : enforce;
31 import std..string : indexOf, endsWith, startsWith;
32 import std.uni;
33 import std.regex;
34 import std.algorithm : min, max;
35 import std.digest.sha;
36 import core.time;
37 
38 
39 Duration diffTimeout = 1.seconds;
40 int DIFF_EDIT_COST = 4;
41 
42 
43 /**
44 * Compute and return the source text (all equalities and deletions).
45 * @param diffs List of Diff objects.
46 * @return Source text.
47 */
48 string diff_text1(Diff[] diffs) {
49     auto text = appender!string();
50     foreach ( d; diffs ) {
51         if (d.operation != Operation.INSERT) {
52             text.put(d.text);
53         }
54     }
55     return text.data();
56 }
57 
58 /**
59 * Compute and return the destination text (all equalities and insertions).
60 * @param diffs List of Diff objects.
61 * @return Destination text.
62 */
63 string diff_text2(Diff[] diffs) {
64     auto text = appender!string();
65     foreach ( d; diffs ) {
66         if (d.operation != Operation.DELETE) {
67             text.put(d.text);
68         }
69     }
70     return text.data();
71 }
72 
73 
74 /**
75  * Compute the Levenshtein distance; the number of inserted, deleted or
76  * substituted characters.
77  * @param diffs List of Diff objects.
78  * @return Number of changes.
79  */
80 int levenshtein(Diff[] diffs) {
81     int levenshtein = 0;
82     int insertions = 0;
83     int deletions = 0;
84     foreach ( d ; diffs ) {
85         final switch (d.operation) {
86           case Operation.INSERT:
87             insertions += d.text.length;
88             break;
89           case Operation.DELETE:
90             deletions += d.text.length;
91             break;
92           case Operation.EQUAL:
93             // A deletion and an insertion is one substitution.
94             levenshtein += max(insertions, deletions);
95             insertions = 0;
96             deletions = 0;
97             break;
98         }
99     }
100     levenshtein += max(insertions, deletions);
101     return levenshtein;
102 }
103 
104 
105 /**
106  * Crush the diff into an encoded string which describes the operations
107  * required to transform text1 into text2.
108  * E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
109  * Operations are tab-separated.  Inserted text is escaped using %xx
110  * notation.
111  * @param diffs Array of Diff objects.
112  * @return Delta text.
113  */
114 string toDelta(in Diff[] diffs)
115 {
116     import std.format : formattedWrite;
117     import std.uri : encode;
118     auto text = appender!string;
119     foreach (aDiff; diffs) {
120         final switch (aDiff.operation) {
121             case Operation.INSERT:
122                 text.formattedWrite("+%s\t", encode(aDiff.text).replace("+", " "));
123                 break;
124             case Operation.DELETE:
125                 text.formattedWrite("-%s\t", aDiff.text.length);
126                 break;
127             case Operation.EQUAL:
128                 text.formattedWrite("=%s\t", aDiff.text.length);
129                 break;
130         }
131     }
132     string delta = text.data;
133     if (delta.length != 0) {
134         // Strip off trailing tab character.
135         delta = delta[0 .. $-1];
136         delta = unescapeForEncodeUriCompatability(delta);
137     }
138     return delta;
139 }
140 
141 /**
142  * Given the original text1, and an encoded string which describes the
143  * operations required to transform text1 into text2, comAdde the full diff.
144  * @param text1 Source string for the diff.
145  * @param delta Delta text.
146  * @return Array of Diff objects or null if invalid.
147  * @throws ArgumentException If invalid input.
148  */
149 Diff[] fromDelta(string text1, string delta)
150 {
151     import std..string : format;
152     import std.uri : decode;
153 
154     auto diffs = appender!(Diff[]);
155     int pointer = 0;  // Cursor in text1
156     foreach (token; delta.splitter("\t")) {
157         if (token.length == 0) {
158             // Blank tokens are ok (from a trailing \t).
159             continue;
160         }
161         // Each token begins with a one character parameter which specifies the
162         // operation of this token (delete, insert, equality).
163         string param = token[1 .. $];
164         switch (token[0]) {
165             case '+':
166                 // decode would change all "+" to " "
167                 param = param.replace("+", "%2b");
168                 param = decode(param);
169                 //} catch (UnsupportedEncodingException e) {
170                 //  // Not likely on modern system.
171                 //  throw new Error("This system does not support UTF-8.", e);
172                 //} catch (IllegalArgumentException e) {
173                 //  // Malformed URI sequence.
174                 //  throw new IllegalArgumentException(
175                 //      "Illegal escape in diff_fromDelta: " + param, e);
176                 //}
177                 diffs ~= Diff(Operation.INSERT, param);
178                 break;
179             case '-': // Fall through.
180             case '=':
181                 int n;
182                 try {
183                     n = param.to!int;
184                 } catch (ConvException e) {
185                     throw new Exception("Invalid number in diff_fromDelta: " ~ param);
186                 }
187                 enforce (n >= 0, "Negative number in diff_fromDelta: " ~ param);
188 
189                 string text;
190                 enforce (pointer + n <= text1.length, 
191                     format("Delta length (%s) larger than source text length (%s).",
192                     pointer, text1.length));
193                 text = text1[pointer .. pointer+n];
194                 pointer += n;
195                 if (token[0] == '=') {
196                     diffs ~= Diff(Operation.EQUAL, text);
197                 } else {
198                     diffs ~= Diff(Operation.DELETE, text);
199                 }
200                 break;
201             default:
202                 // Anything else is an error.
203                 throw new Exception(
204                 "Invalid diff operation in diff_fromDelta: " ~ token[0]);
205         }
206     }
207     if (pointer != text1.length)
208         throw new Exception(format("Delta length (´%s) smaller than source text length (%s).", pointer, text1.length));
209     return diffs.data;
210 }
211 
212 struct LinesToCharsResult {
213     string text1;
214     string text2;
215     string[] uniqueStrings;
216     bool opEquals()(auto ref const LinesToCharsResult other) const {
217         return text1 == other.text1 && 
218                text2 == other.text2 &&
219                uniqueStrings == other.uniqueStrings;
220     }
221 }
222 
223 LinesToCharsResult linesToChars(string text1, string text2) 
224 {
225     LinesToCharsResult res;
226     int[string] lineHash;
227     res.uniqueStrings ~= "";
228     res.text1 = linesToCharsMunge(text1, res.uniqueStrings, lineHash);
229     res.text2 = linesToCharsMunge(text2, res.uniqueStrings, lineHash);
230     return res;
231 }
232 
233 string linesToCharsMunge(string text, ref string[] lines, ref int[string] linehash)
234 {
235     sizediff_t lineStart = 0;
236     sizediff_t lineEnd = -1;
237     string line;
238     auto chars = appender!string();
239     while( lineEnd < cast(sizediff_t)text.length - 1 ){
240         lineEnd = text.indexOfAlt("\n", lineStart);
241         if( lineEnd == -1 ) lineEnd = text.length - 1;
242         line = text[lineStart..lineEnd + 1];
243         lineStart = lineEnd + 1;
244 
245         auto pv = line in linehash;
246         if( pv ) {
247             chars ~= cast(char)*pv;
248         } else {
249             lines ~= line;
250             linehash[line] = lines.length - 1;
251             chars ~= cast(char)(lines.length -1);
252         }
253     }
254     return chars.data();
255 }
256 
257 void charsToLines(ref Diff[] diffs, string[] lineArray)
258 {
259     foreach( d ; diffs){
260         auto str = appender!string();
261         for( auto y = 0; y < d.text.length; y++) {
262             str.put(lineArray[d.text[y]]);
263         }
264         d.text = str.data();
265     }
266 }
267 
268 int commonPrefix(string text1, string text2)
269 {
270     auto n = min(text1.length, text2.length);
271     for( auto i = 0; i < n; ++i ){
272         if( text1[i] != text2[i] ) return i;
273     }
274     return n;
275 }
276 
277 int commonSuffix(string text1, string text2)
278 {
279     auto len1 = text1.length;
280     auto len2 = text2.length;
281     auto n = min(len1, len2);
282     
283     for( auto i = 1; i <= n; i++ ){
284         if( text1[len1 - i] != text2[len2 - i] ) return i - 1;
285     }
286     return n;
287 }
288 
289 /**
290 * Determine if the suffix of one string is the prefix of another.
291 * @param text1 First string.
292 * @param text2 Second string.
293 * @return The number of characters common to the end of the first
294 *     string and the start of the second string.
295 */
296 int commonOverlap(string text1, string text2) {
297     // Cache the text lengths to prevent multiple calls.
298     int text1_length = text1.length; 
299     int text2_length = text2.length;
300     // Eliminate the null case.
301     if (text1_length == 0 || text2_length == 0) return 0;
302 
303     // Truncate the longer string.
304     if (text1_length > text2_length) {
305         text1 = text1.substr(text1_length - text2_length);
306     } else if (text1_length < text2_length) {
307         text2 = text2.substr(0, text1_length);
308     }
309     int text_length = min(text1_length, text2_length);
310     // Quick check for the worst case.
311     if (text1 == text2) {
312         return text_length;
313     }
314 
315     // Start by looking for a single character match
316     // and increase length until no match is found.
317     // Performance analysis: http://neil.fraser.name/news/2010/11/04/
318     int best = 0;
319     int length = 1;
320     while (true) {
321         string pattern = text1.substr(text_length - length);
322         int found = text2.indexOf(pattern);
323         if (found == -1) {
324             return best;
325         }
326         length += found;
327         if (found == 0 || text1.substr(text_length - length) == text2.substr(0, length)) {
328             best = length;
329             length++;
330         }
331     }
332 }
333 
334 /**-
335 * The data structure representing a diff is a List of Diff objects:
336 * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"),
337 *  Diff(Operation.EQUAL, " world.")}
338 * which means: delete "Hello", add "Goodbye" and keep " world."
339 */
340 enum Operation { 
341     DELETE,
342     INSERT,
343     EQUAL
344 }
345 
346 
347 /**
348 * Struct representing one diff operation.
349 */
350 struct Diff {
351     Operation operation;
352     string text;
353 
354     this(Operation operation, string text)
355     {
356         this.operation = operation;
357         this.text = text;
358     }
359 
360     string toString()
361     {
362         //string prettyText = text.replace('\n', '\u00b6');
363         string op;
364         final switch(operation)  {
365             case Operation.DELETE:
366                 op = "DELETE"; break;
367             case Operation.INSERT:
368                 op = "INSERT"; break;
369             case Operation.EQUAL:
370                 op = "EQUAL"; break;
371         }
372         return "Diff(" ~ op ~ ",\"" ~ text ~ "\")";
373     }
374 
375     bool opEquals(const Diff other) const
376     {
377         return operation == other.operation && text == other.text;
378     }
379 }
380 
381 
382 /**
383  * Find the differences between two texts.
384  * Run a faster, slightly less optimal diff.
385  * This method allows the 'checklines' of diff_main() to be optional.
386  * Most of the time checklines is wanted, so default to true.
387  * @param text1 Old string to be diffed.
388  * @param text2 New string to be diffed.
389  * @return List of Diff objects.
390  */
391 Diff[] diff_main(string text1, string text2)
392 {
393     return diff_main(text1, text2, true);
394 }
395 
396 /**
397  * Find the differences between two texts.
398  * @param text1 Old string to be diffed.
399  * @param text2 New string to be diffed.
400  * @param checklines Speedup flag.  If false, then don't run a
401  *     line-level diff first to identify the changed areas.
402  *     If true, then run a faster slightly less optimal diff.
403  * @return List of Diff objects.
404  */
405 Diff[] diff_main(string text1, string text2, bool checklines)
406 {
407     // Set a deadline by which time the diff must be complete.
408     SysTime deadline;
409     if (diffTimeout <= 0.seconds) {
410         deadline = SysTime.max;
411     } else {
412         deadline = Clock.currTime(UTC()) + diffTimeout;
413     }
414     return diff_main(text1, text2, checklines, deadline);
415 }
416 
417 /**
418  * Find the differences between two texts.  Simplifies the problem by
419  * stripping any common prefix or suffix off the texts before diffing.
420  * @param text1 Old string to be diffed.
421  * @param text2 New string to be diffed.
422  * @param checklines Speedup flag.  If false, then don't run a
423  *     line-level diff first to identify the changed areas.
424  *     If true, then run a faster slightly less optimal diff.
425  * @param deadline Time when the diff should be complete by.  Used
426  *     internally for recursive calls.  Users should set DiffTimeout
427  *     instead.
428  * @return List of Diff objects.
429  */
430 Diff[] diff_main(string text1, string text2, bool checklines, SysTime deadline)
431 {
432     Diff[] diffs;
433     if( text1 == text2 ){
434         if( text1.length != 0 ) diffs ~= Diff(Operation.EQUAL, text1);
435         return diffs;
436     }
437 
438     auto pos = commonPrefix(text1, text2);
439     auto prefix = text1.substr(0, pos);
440     text1 = text1.substr(pos);
441     text2 = text2.substr(pos);
442 
443     pos = commonSuffix(text1, text2);
444     auto suffix = text1.substr(text1.length - pos);
445     text1 = text1.substr(0, text1.length - pos);    
446     text2 = text2.substr(0, text2.length - pos);
447 
448     // Compute the diff on the middle block.
449     diffs = computeDiffs(text1, text2, checklines, deadline);
450 
451       // Restore the prefix and suffix.
452     if( prefix.length != 0 ) {
453         diffs.insert(0, [Diff(Operation.EQUAL, prefix)]);
454     }
455     if( suffix.length != 0 ) {
456         diffs ~= Diff(Operation.EQUAL, suffix);
457     }
458 
459     cleanupMerge(diffs);
460     return diffs;
461 }
462 
463 
464 
465 struct HalfMatch {
466     string prefix1;
467     string suffix1;
468     string suffix2;
469     string prefix2;
470     string commonMiddle;
471 
472     bool opEquals()(auto ref const HalfMatch other) const {
473         return prefix1 == other.prefix1 &&
474                suffix1 == other.suffix1 &&
475                prefix2 == other.prefix2 &&
476                suffix2 == other.suffix2;
477     }
478 }
479 /*
480  * Do the two texts share a Substring which is at least half the length of
481  * the longer text?
482  * This speedup can produce non-minimal diffs.
483  * @param text1 First string.
484  * @param text2 Second string.
485  * @return Five element String array, containing the prefix of text1, the
486  *     suffix of text1, the prefix of text2, the suffix of text2 and the
487  *     common middle.  Or null if there was no match.
488  */
489 bool halfMatch(string text1, string text2, out HalfMatch halfmatch){
490     if (diffTimeout <= 0.seconds) {
491         // Don't risk returning a non-optimal diff if we have unlimited time.
492         return false;
493     }
494     string longtext = text1.length > text2.length ? text1 : text2;
495     string shorttext = text1.length > text2.length ? text2 : text1;
496     if( longtext.length < 4 || shorttext.length * 2 < longtext.length ) return false; //pointless
497     HalfMatch hm1;
498     HalfMatch hm2;
499     auto is_hm1 = halfMatchI(longtext, shorttext, (longtext.length + 3) / 4, hm1);
500     auto is_hm2 = halfMatchI(longtext, shorttext, (longtext.length + 1) / 2, hm2);
501     HalfMatch hm;
502     if( !is_hm1 && !is_hm2 ){ 
503         return false;
504     } else if( !is_hm2  ){
505         hm = hm1;
506     } else if( !is_hm1 ){
507         hm = hm2;
508     } else {
509         hm = hm1.commonMiddle.length > hm2.commonMiddle.length ? hm1 : hm2;
510     }
511 
512     if( text1.length > text2.length ) { 
513         halfmatch = hm;
514         return true;
515     }
516     halfmatch.prefix1 = hm.prefix2;
517     halfmatch.suffix1 = hm.suffix2;
518     halfmatch.prefix2 = hm.prefix1;
519     halfmatch.suffix2 = hm.suffix1;
520     halfmatch.commonMiddle = hm.commonMiddle;
521     return true;
522 }
523 
524 
525 bool halfMatchI(string longtext, string shorttext, int i, out HalfMatch hm){
526     auto seed = longtext.substr(i, longtext.length / 4);
527     sizediff_t j = -1;
528     string best_common;
529     string best_longtext_a;
530     string best_longtext_b;
531     string best_shorttext_a;
532     string best_shorttext_b;
533     while( j < cast(sizediff_t)shorttext.length && ( j = shorttext.indexOfAlt(seed, j + 1)) != -1 ){
534         auto prefixLen = commonPrefix(longtext.substr(i), shorttext.substr(j));
535         auto suffixLen = commonSuffix(longtext.substr(0, i), shorttext.substr(0, j));
536         if( best_common.length < suffixLen + prefixLen ) {
537             best_common = shorttext.substr(j - suffixLen, suffixLen) ~ shorttext.substr(j, prefixLen);
538             best_longtext_a = longtext.substr(0, i - suffixLen);
539             best_longtext_b = longtext.substr(i + prefixLen);
540             best_shorttext_a = shorttext.substr(0, j - suffixLen);
541             best_shorttext_b = shorttext.substr(j + prefixLen);      
542         }
543     }
544     if( best_common.length * 2 >= longtext.length ) {
545         hm.prefix1 = best_longtext_a;
546         hm.suffix1 = best_longtext_b;
547         hm.prefix2 = best_shorttext_a;
548         hm.suffix2 = best_shorttext_b;
549         hm.commonMiddle = best_common;
550         return true;
551     } else {
552         return false;
553     }
554 }
555 
556 
557 /**
558      * Find the differences between two texts.  Assumes that the texts do not
559      * have any common prefix or suffix.
560      * @param text1 Old string to be diffed.
561      * @param text2 New string to be diffed.
562      * @param checklines Speedup flag.  If false, then don't run a
563      *     line-level diff first to identify the changed areas.
564      *     If true, then run a faster slightly less optimal diff.
565      * @param deadline Time when the diff should be complete by.
566      * @return List of Diff objects.
567      */
568 Diff[] computeDiffs(string text1, string text2, bool checklines, SysTime deadline)
569 {
570     Diff[] diffs;
571 
572     if( text1.length == 0 ){
573         diffs ~= Diff(Operation.INSERT, text2);
574         return diffs;
575     }
576     if( text2.length == 0 ){
577         diffs ~= Diff(Operation.DELETE, text1);
578         return diffs;
579     }
580 
581     auto longtext = text1.length > text2.length ? text1 : text2;
582     auto shorttext = text1.length > text2.length ? text2 : text1;
583     auto i = longtext.indexOf(shorttext);
584     if( i != -1 ){
585         Operation op = (text1.length > text2.length) ? Operation.DELETE : Operation.INSERT;
586         diffs ~= Diff(op, longtext.substr(0, i));
587         diffs ~= Diff(Operation.EQUAL, shorttext);
588         diffs ~= Diff(op, longtext.substr(i + shorttext.length));
589         return diffs;
590     }
591 
592     if( shorttext.length == 1 ){
593         diffs ~= Diff(Operation.DELETE, text1);
594         diffs ~= Diff(Operation.INSERT, text2);
595         return diffs;
596     }
597     HalfMatch hm;
598     auto is_hm = halfMatch(text1, text2, hm);
599     if( is_hm ){
600         auto diffs_a = diff_main(hm.prefix1, hm.prefix2, checklines, deadline);
601         auto diffs_b = diff_main(hm.suffix1, hm.suffix2, checklines, deadline);
602 
603         diffs = diffs_a;
604         diffs ~= Diff(Operation.EQUAL, hm.commonMiddle);
605         diffs ~= diffs_b;
606         return diffs;
607     }
608 
609     if( checklines && text1.length > 100 && text2.length > 100 ){
610         return diff_lineMode(text1, text2, deadline);
611     }
612 
613     return bisect(text1, text2, deadline);
614 }
615 
616 Diff[] diff_lineMode(string text1, string text2, SysTime deadline)
617 {
618     auto b = linesToChars(text1, text2);
619 
620     auto diffs = diff_main(b.text1, b.text2, false, deadline);
621 
622     charsToLines(diffs, b.uniqueStrings);
623     cleanupSemantic(diffs);
624 
625     diffs ~= Diff(Operation.EQUAL, "");
626     auto pointer = 0;
627     auto count_delete = 0;
628     auto count_insert = 0;
629     string text_delete;
630     string text_insert;
631     while( pointer < diffs.length ){
632         final switch( diffs[pointer].operation ) {
633             case Operation.INSERT:
634                 count_insert++;
635                 text_insert ~= diffs[pointer].text;
636                 break;
637             case Operation.DELETE:
638                 count_delete++;
639                 text_delete ~= diffs[pointer].text;
640                 break;
641             case Operation.EQUAL:
642                 if( count_delete >= 1 && count_insert >= 1 ){
643 
644                     diffs.remove(pointer - count_delete - count_insert,
645                                  count_delete + count_insert);
646 
647                     pointer = pointer - count_delete - count_insert;
648 
649                     auto a = diff_main(text_delete, text_insert, false, deadline);
650                     diffs.insert(pointer, a);
651                     pointer += a.length;
652                 }
653                 count_insert = 0;
654                 count_delete = 0;
655                 text_delete = "";
656                 text_insert = "";
657                 break;
658         }
659         pointer++;
660     }
661     diffs.remove(diffs.length - 1);
662     return diffs;
663 }
664 
665 Diff[] bisect(string text1, string text2, SysTime deadline)
666 {
667     auto text1_len = text1.length;
668     auto text2_len = text2.length;
669     int max_d = (text1_len + text2_len + 1) / 2;
670     int v_offset = max_d;
671     int v_len = 2 * max_d;
672     int[] v1;
673     int[] v2;
674     for( auto x = 0; x < v_len; x++ ){
675         v1 ~= -1;
676         v2 ~= -1;
677     }
678     v1[v_offset + 1] = 0;
679     v2[v_offset + 1] = 0;
680     int delta = text1_len - text2_len;
681     bool front = (delta % 2 != 0);
682     auto k1start = 0;
683     auto k1end = 0;
684     auto k2start = 0;
685     auto k2end = 0;
686     for( auto d = 0; d < max_d; d++ ){
687         // Bail out if deadline is reached.
688         if (Clock.currTime(UTC()) > deadline) {
689             break;
690         }
691 
692         for( int k1 = -d + k1start; k1 <= d - k1end; k1 += 2 ){
693             auto k1_offset = v_offset + k1;
694             int x1;
695             if( k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1] ) {
696                 x1 = v1[k1_offset + 1];
697             } else {
698                 x1 = v1[k1_offset - 1] + 1;
699             }
700             auto y1 = x1 - k1;
701             while( x1 < text1_len && y1 < text2_len && text1[x1] == text2[y1] ){
702                 x1++;
703                 y1++;
704             }
705             v1[k1_offset] = x1;
706             if( x1 > text1_len) {
707                 k1end += 2;
708             } else if( y1 > text2_len ){
709                 k1start += 2;
710             } else if( front ){
711                 auto k2_offset = v_offset + delta - k1;
712                 if( k2_offset >= 0 && k2_offset < v_len && v2[k2_offset] != -1) {
713                     auto x2 = text1_len - v2[k2_offset];
714                     if( x1 >= x2 ) return bisectSplit(text1, text2, x1, y1, deadline);
715                 }
716             } 
717         }
718         for( auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
719             auto k2_offset = v_offset + k2;
720             int x2;
721             if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
722                 x2 = v2[k2_offset + 1];
723             } else {
724                 x2 = v2[k2_offset - 1] + 1;
725             }
726             auto y2 = x2 - k2;
727             while( x2 < text1_len && y2 < text2_len
728                     && text1[text1_len - x2 - 1]
729                     == text2[text2_len - y2 - 1] ){
730                 x2++;
731                 y2++;
732             }
733             v2[k2_offset] = x2;
734             if (x2 > text1_len) {
735                 // Ran off the left of the graph.
736                 k2end += 2;
737             } else if (y2 > text2_len) {
738                 // Ran off the top of the graph.
739                 k2start += 2;
740             } else if (!front) {
741                 auto k1_offset = v_offset + delta - k2;
742                 if (k1_offset >= 0 && k1_offset < v_len && v1[k1_offset] != -1) {
743                     auto x1 = v1[k1_offset];
744                     auto y1 = v_offset + x1 - k1_offset;
745                     // Mirror x2 onto top-left coordinate system.
746                     x2 = text1_len - v2[k2_offset];
747                     if (x1 >= x2) {
748                         // Overlap detected.
749                         return bisectSplit(text1, text2, x1, y1, deadline);
750                     }
751                 }
752             }
753         }
754     }
755     Diff[] diffs;
756     diffs ~= Diff(Operation.DELETE, text1);
757     diffs ~= Diff(Operation.INSERT, text2);
758     return diffs;
759 }
760 
761 
762 Diff[] bisectSplit(string text1, string text2, int x, int y, SysTime deadline)
763 {
764     auto text1a = text1.substr(0, x);
765     auto text2a = text2.substr(0, y);
766     auto text1b = text1.substr(x);
767     auto text2b = text2.substr(y);
768 
769     Diff[] diffs = diff_main(text1a, text2a, false, deadline);
770     Diff[] diffsb = diff_main(text1b, text2b, false, deadline);
771     diffs ~= diffsb;
772     return diffs;
773 }
774 
775 void cleanupSemantic(ref Diff[] diffs) 
776 {
777     auto changes = false;
778     int[] equalities;
779 
780     string last_equality = null;
781     auto pointer = 0;
782     auto length_insertions1 = 0;
783     auto length_deletions1 = 0;
784     auto length_insertions2 = 0;
785     auto length_deletions2 = 0;
786 
787     while( pointer < diffs.length) {
788         if( diffs[pointer].operation == Operation.EQUAL ){
789             equalities ~= pointer;
790             length_insertions1 = length_insertions2;
791             length_deletions1 = length_deletions2;
792             length_insertions2 = 0;
793             length_deletions2 = 0;
794             last_equality = diffs[pointer].text;
795         } else {
796             if( diffs[pointer].operation == Operation.INSERT ){
797                 length_insertions2 += diffs[pointer].text.length;
798             } else {
799                 length_deletions2 += diffs[pointer].text.length;
800             }
801 
802             if( last_equality != null && 
803                 (last_equality.length <= max(length_insertions1, length_deletions1))
804                 && (last_equality.length <= max(length_insertions2, length_deletions2))) {
805                 // Duplicate record.
806                 diffs.insert(equalities[$-1], [Diff(Operation.DELETE, last_equality)]);
807                 diffs[equalities[$-1]+1] = Diff(Operation.INSERT, diffs[equalities[$-1]+1].text);
808 
809                 //Correct to pop twice ??????
810                 equalities = equalities[0..$-1]; 
811                 if( equalities.length > 0 ){
812                     equalities = equalities[0..$-1]; 
813                 }
814                 pointer = equalities.length > 0 ? equalities[$-1] : -1;
815                 length_insertions1 = 0;
816                 length_deletions1 = 0;
817                 length_insertions2 = 0;
818                 length_deletions2 = 0;
819                 last_equality = null;
820                 changes = true;
821             }
822         }
823         pointer++;
824     }
825 
826     if( changes ) {
827         cleanupMerge(diffs);
828     }
829     cleanupSemanticLossless(diffs);
830     pointer = 1;
831     while( pointer < diffs.length ){
832         if( diffs[pointer - 1].operation == Operation.DELETE &&
833             diffs[pointer].operation == Operation.INSERT) {
834             auto deletion = diffs[pointer - 1].text;
835             auto insertion = diffs[pointer].text;
836             auto overlap_len1 = commonOverlap(deletion, insertion);
837             auto overlap_len2 = commonOverlap(insertion, deletion);
838             if( overlap_len1 >= overlap_len2 ){
839                 if( overlap_len1 >= deletion.length / 2.0 || 
840                     overlap_len1 >= insertion.length / 2.0) {
841                     //Overlap found.
842                     //Insert an equality and trim the surrounding edits.
843                     diffs.insert(pointer, [Diff(Operation.EQUAL, insertion.substr(0, overlap_len1))]);
844                     diffs[pointer - 1].text = deletion.substr(0, deletion.length - overlap_len1);
845                     diffs[pointer + 1].text = insertion.substr(0, deletion.length - overlap_len1);
846                     pointer++;
847                 }
848             } else {
849                 if( overlap_len2 > deletion.length / 2.0 ||
850                     overlap_len2 > insertion.length / 2.0) {
851                     diffs.insert(pointer, [Diff(Operation.EQUAL, deletion.substr(0, overlap_len2))]);
852 
853                     diffs[pointer - 1].operation = Operation.INSERT;
854                     diffs[pointer - 1].text = insertion.substr(0, insertion.length - overlap_len2);
855                     diffs[pointer + 1].operation = Operation.DELETE;
856                     diffs[pointer + 1].text = deletion.substr(overlap_len2);
857                     pointer++;
858                 }
859             }
860             pointer++;
861         }
862         pointer++;
863     }
864 }
865 
866 /**
867  * Look for single edits surrounded on both sides by equalities
868  * which can be shifted sideways to align the edit to a word boundary.
869  * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
870  * @param diffs List of Diff objects.
871  */
872 void cleanupSemanticLossless(ref Diff[] diffs)
873 {
874     auto pointer = 1;
875     // Intentionally ignore the first and last element (don't need checking).
876     while( pointer < cast(sizediff_t)(diffs.length) - 1 ){
877         if( diffs[pointer-1].operation == Operation.EQUAL &&
878             diffs[pointer+1].operation == Operation.EQUAL) {
879             // This is a single edit surrounded by equalities.
880             auto equality1 = diffs[pointer-1].text;
881             auto edit = diffs[pointer].text;
882             auto equality2 = diffs[pointer+1].text;
883 
884             // First, shift the edit as far left as possible
885             auto commonOffset = commonSuffix(equality1, edit);
886             if( commonOffset > 0 ){
887                 auto commonString = edit.substr(edit.length - commonOffset);
888                 equality1 = equality1.substr(0, equality1.length - commonOffset);
889                 edit = commonString ~ edit.substr(0, edit.length - commonOffset);
890                 equality2 = commonString ~ equality2;
891             }
892 
893             // Second, step character by character right,
894             // looking for the best fit.
895             auto best_equality1 = equality1;
896             auto best_edit = edit;
897             auto best_equality2 = equality2;
898             auto best_score = cleanupSemanticScore(equality1, edit) + cleanupSemanticScore(edit, equality2);
899             while( edit.length != 0 && equality2.length != 0 && edit[0] == equality2[0]){
900                 equality1 ~= edit[0];
901                 edit =  edit.substr(1) ~ equality2[0];
902                 equality2 = equality2.substr(1);
903                 auto score = cleanupSemanticScore(equality1, edit) + cleanupSemanticScore(edit, equality2);
904                 // The >= encourages trailing rather than leading whitespace on
905                 // edits.
906                 if (score >= best_score) {
907                     best_score = score;
908                     best_equality1 = equality1;
909                     best_edit = edit;
910                     best_equality2 = equality2;
911                 }
912             }
913 
914             if( diffs[pointer-1].text != best_equality1 ){
915                 // We have an improvement, save it back to the diff.
916                 if( best_equality1.length != 0) {
917                     diffs[pointer-1].text = best_equality1;
918                 } else {
919                     diffs.remove(pointer - 1);
920                     pointer--;
921                 }
922                 diffs[pointer].text = best_edit;
923                 if( best_equality2.length != 0 ){
924                     diffs[pointer+1].text = best_equality2;
925                 } else {
926                     diffs.remove(pointer + 1);
927                     pointer--;
928                 }
929             }
930         }
931         pointer++;
932     }
933 }
934 
935 
936 
937 
938 /**
939  * Reorder and merge like edit sections.  Merge equalities.
940  * Any edit section can move as long as it doesn't cross an equality.
941  * @param diffs List of Diff objects.
942  */
943 void cleanupMerge(ref Diff[] diffs) {
944     diffs ~= Diff(Operation.EQUAL, "");
945     auto pointer = 0;
946     auto count_delete = 0;
947     auto count_insert = 0;
948     string text_delete;
949     string text_insert;
950     auto commonlength = 0;
951     while(pointer < diffs.length) {
952         final switch(diffs[pointer].operation){
953             case Operation.INSERT:
954                 count_insert++;
955                 text_insert ~= diffs[pointer].text;
956                 pointer++;
957                 break;
958             case Operation.DELETE:
959                 count_delete++;
960                 text_delete ~= diffs[pointer].text;
961                 pointer++;
962                 break;
963             case Operation.EQUAL:
964                 // Upon reaching an equality, check for prior redundancies.
965                 if (count_delete + count_insert > 1) {
966                     if (count_delete != 0 && count_insert != 0) {
967                         // Factor out any common prefixies.
968                         commonlength = commonPrefix(text_insert, text_delete);
969                         if (commonlength != 0) {
970                             if ((pointer - count_delete - count_insert) > 0 &&
971                                 diffs[pointer - count_delete - count_insert - 1].operation
972                                     == Operation.EQUAL) {
973                                 diffs[pointer - count_delete - count_insert - 1].text
974                                     ~= text_insert.substr(0, commonlength);
975                             } else {
976                                 diffs.insert(0, [Diff(Operation.EQUAL, text_insert.substr(0, commonlength))]);
977                                 pointer++;
978                             }
979                             text_insert = text_insert.substr(commonlength);
980                             text_delete = text_delete.substr(commonlength);
981                         }
982                         // Factor out any common suffixies.
983                         commonlength = commonSuffix(text_insert, text_delete);
984                         if (commonlength != 0) {                        
985                             diffs[pointer].text = text_insert.substr(text_insert.length - commonlength) ~ diffs[pointer].text;
986                             text_insert = text_insert.substr(0, text_insert.length - commonlength);
987                             text_delete = text_delete.substr(0, text_delete.length - commonlength);
988                         }
989                     }
990                     // Delete the offending records and add the merged ones.
991                     if (count_delete == 0) {
992                         diffs.splice(pointer - count_insert, count_delete + count_insert, [Diff(Operation.INSERT, text_insert)]);
993                     } else if (count_insert == 0) {
994 
995                         diffs.splice(pointer - count_delete, count_delete + count_insert, [Diff(Operation.DELETE, text_delete)]);
996                     } else {
997                         diffs.splice(pointer - count_delete - count_insert, count_delete + count_insert, [Diff(Operation.DELETE, text_delete), Diff(Operation.INSERT, text_insert)]);
998                     }
999                     pointer = pointer - count_delete - count_insert +
1000                             (count_delete != 0 ? 1 : 0) + (count_insert != 0 ? 1 : 0) + 1;
1001                 } else if( pointer != 0 && diffs[pointer-1].operation == Operation.EQUAL ){
1002                     diffs[pointer - 1].text ~= diffs[pointer].text;
1003                     diffs.remove(pointer);
1004                 } else {
1005                     pointer++;
1006                 }
1007                 count_insert = 0;
1008                 count_delete = 0;
1009                 text_delete = "";
1010                 text_insert = "";
1011                 break;
1012         }
1013     }
1014     if( diffs[$-1].text.length == 0){
1015         diffs.remove(diffs.length - 1);
1016     }
1017     
1018     bool changes = false;
1019     pointer = 1;
1020     while( pointer < (cast(sizediff_t)diffs.length - 1) ) {
1021         if( diffs[pointer - 1].operation == Operation.EQUAL && 
1022             diffs[pointer + 1].operation == Operation.EQUAL) {
1023             if( diffs[pointer].text.endsWith(diffs[pointer - 1].text)) {
1024                 diffs[pointer].text = diffs[pointer - 1].text ~ diffs[pointer].text.substr(0, diffs[pointer].text.length - diffs[pointer - 1].text.length);
1025                 diffs[pointer + 1].text = diffs[pointer - 1].text ~ diffs[pointer + 1].text;
1026                 diffs.splice(pointer - 1, 1);
1027                 changes = true;
1028             } else if( diffs[pointer].text.startsWith(diffs[pointer + 1].text)) {
1029                 diffs[pointer - 1].text ~= diffs[pointer + 1].text;
1030                 diffs[pointer].text = 
1031                     diffs[pointer].text.substr(diffs[pointer + 1].text.length)
1032                     ~ diffs[pointer + 1].text;
1033                 diffs.splice(pointer + 1, 1);
1034                 changes = true;
1035             }
1036         }
1037         pointer++;
1038     }
1039     if( changes ) cleanupMerge(diffs);
1040 
1041 }
1042 
1043 
1044 
1045 /**
1046  * Given two strings, comAdde a score representing whether the internal
1047  * boundary falls on logical boundaries.
1048  * Scores range from 6 (best) to 0 (worst).
1049  * @param one First string.
1050  * @param two Second string.
1051  * @return The score.
1052  */
1053 int cleanupSemanticScore(string one, string two) 
1054 {
1055     if( one.length == 0 || two.length == 0) return 6; //Edges are the best
1056     auto char1 = one[$-1];
1057     auto char2 = two[0];
1058 
1059     auto nonAlphaNumeric1 = !(isAlpha(char1) || isNumber(char1));    
1060     auto nonAlphaNumeric2 = !(isAlpha(char2) || isNumber(char2));
1061     auto whitespace1 = nonAlphaNumeric1 && isWhite(char1);
1062     auto whitespace2 = nonAlphaNumeric2 && isWhite(char2);
1063     auto lineBreak1 = whitespace1 && isControl(char1);
1064     auto lineBreak2 = whitespace2 && isControl(char2);
1065     auto blankLine1 = lineBreak1 &&  match(one, `\n\r?\n\Z`);
1066     auto blankLine2 = lineBreak2 &&  match(two, `\A\r?\n\r?\n`);
1067 
1068     if (blankLine1 || blankLine2) return 5;
1069     else if (lineBreak1 || lineBreak2) return 4;
1070     else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) return 3;
1071     else if (whitespace1 || whitespace2) return 2;
1072     else if (nonAlphaNumeric1 || nonAlphaNumeric2) return 1;
1073     
1074     return 0;
1075 }
1076 
1077 
1078 /**
1079  * Reduce the number of edits by eliminating operationally trivial
1080  * equalities.
1081  * @param diffs List of Diff objects.
1082  */
1083 void cleanupEfficiency(Diff[] diffs) {
1084     bool changes = false;
1085     int[] equalities;
1086     string lastequality;
1087     int pointer = 0;
1088     auto pre_ins = false;
1089     auto pre_del = false;
1090     auto post_ins = false;
1091     auto post_del = false;
1092     while( pointer < diffs.length ){
1093         if( diffs[pointer].operation == Operation.EQUAL ){
1094             if( diffs[pointer].text.length < DIFF_EDIT_COST && (post_ins || post_del)) {
1095                 equalities ~= pointer;
1096                 pre_ins = post_ins;
1097                 pre_del = post_del;
1098                 lastequality = diffs[pointer].text;
1099             } else {
1100                 equalities = [];
1101                 lastequality = "";
1102             }
1103             post_ins = false;
1104             post_del = false;
1105         } else {
1106             if( diffs[pointer].operation == Operation.DELETE ){
1107                 post_del = true;
1108             } else {
1109                 post_ins = true;
1110             }
1111 
1112             if( (lastequality.length != 0) 
1113                 && ((pre_ins && pre_del && post_ins && post_del)
1114                 || ((lastequality.length < DIFF_EDIT_COST / 2)
1115                 && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0)
1116                     + (post_del ? 1 : 0)) == 3))) {
1117 
1118                 diffs.insert(equalities[$], [Diff(Operation.DELETE, lastequality)]);
1119                 diffs[equalities[$] + 1].operation = Operation.INSERT;
1120                 equalities = equalities[0..$-1];
1121                 lastequality = "";
1122                 if( pre_ins && pre_del ){
1123                     post_ins = false;
1124                     post_ins = false;
1125                     equalities = [];
1126                 } else {
1127                     if( equalities.length > 0 ) equalities = equalities[0..$-1];
1128                     pointer = equalities.length > 0 ? equalities[$] : -1;
1129                     post_ins = false;
1130                     post_del = false;
1131                 }
1132                 changes = true;
1133             }
1134         }
1135         pointer++;
1136     }
1137 
1138     if( changes ){
1139         cleanupMerge(diffs);
1140     }
1141 }
1142 
1143 /**
1144  * loc is a location in text1, comAdde and return the equivalent location in
1145  * text2.
1146  * e.g. "The cat" vs "The big cat", 1->1, 5->8
1147  * @param diffs List of Diff objects.
1148  * @param loc Location within text1.
1149  * @return Location within text2.
1150  */
1151 int xIndex(Diff[] diffs, int loc){
1152     auto chars1 = 0;
1153     auto chars2 = 0;
1154     auto last_chars1 = 0;
1155     auto last_chars2 = 0;
1156     Diff lastDiff;
1157     foreach ( diff; diffs) {
1158         if (diff.operation != Operation.INSERT) {
1159             // Equality or deletion.
1160             chars1 += diff.text.length;
1161         }
1162         if (diff.operation != Operation.DELETE) {
1163             // Equality or insertion.
1164             chars2 += diff.text.length;
1165         }
1166         if (chars1 > loc) {
1167             // Overshot the location.
1168             lastDiff = diff;
1169             break;
1170         }
1171         last_chars1 = chars1;
1172         last_chars2 = chars2;
1173     }
1174     if (lastDiff.operation == Operation.DELETE) {
1175         // The location was deleted.
1176         return last_chars2;
1177     }
1178     // Add the remaining character length.
1179     return last_chars2 + (loc - last_chars1);
1180 }
1181 
1182 /**
1183  * Unescape selected chars for compatability with JavaScript's encodeURI.
1184  * In speed critical applications this could be dropped since the
1185  * receiving application will certainly decode these fine.
1186  * Note that this function is case-sensitive.  Thus "%3F" would not be
1187  * unescaped.  But this is ok because it is only called with the output of
1188  * HttpUtility.UrlEncode which returns lowercase hex.
1189  *
1190  * Example: "%3f" -> "?", "%24" -> "$", etc.
1191  *
1192  * @param str The string to escape.
1193  * @return The escaped string.
1194  */
1195 public static string unescapeForEncodeUriCompatability(string str)
1196 {
1197     // FIXME: this is ridiculously inefficient
1198     return str.replace("%21", "!").replace("%7e", "~")
1199       .replace("%27", "'").replace("%28", "(").replace("%29", ")")
1200       .replace("%3b", ";").replace("%2f", "/").replace("%3f", "?")
1201       .replace("%3a", ":").replace("%40", "@").replace("%26", "&")
1202       .replace("%3d", "=").replace("%2b", "+").replace("%24", "$")
1203       .replace("%2c", ",").replace("%23", "#");
1204 }