1 /*
2  * Copyright 2008 Google Inc. All Rights Reserved.
3  * Copyright 2013-2014 Jan Krüger. All Rights Reserved.
4  * Author: fraser@google.com (Neil Fraser)
5  * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas)
6  * Author: jan@jandoe.de (Jan Krüger)
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *   http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  * Diff Match and Patch
21  * http://code.google.com/p/google-diff-match-patch/
22  */
23 module ddmp.patch;
24 import std.array;
25 import std.conv;
26 import std..string:lastIndexOf;
27 
28 import ddmp.diff;
29 import ddmp.match;
30 import ddmp.util;
31 
32 int MATCH_MAXBITS = 32;
33 int PATCH_MARGIN = 4;
34 float PATCH_DELETE_THRESHOLD = 0.5f;
35 
36 struct Patch {
37     Diff[] diffs;
38     int start1;
39     int start2;
40     int length1;
41     int length2;
42 
43     string toString()
44     const {
45         auto app = appender!string();
46         app.put("@@ -");
47         if( length1 == 0 ){
48             app.put(to!string(start1));
49             app.put(",0");
50         } else if( length1 == 1 ){
51             app.put(to!string(start1 + 1));
52         } else {
53             app.put(to!string(start1 + 1));
54             app.put(",");
55             app.put(to!string(length1));
56         }
57         app.put(" +");
58         if( length2 == 0 ){
59             app.put(to!string(start2));
60             app.put(",0");
61         } else if( length2 == 1 ){
62             app.put(to!string(start2 + 1));
63         } else {
64             app.put(to!string(start2 + 1));
65             app.put(",");
66             app.put(to!string(length2));
67         }
68         app.put(" @@\n");
69         foreach( d ; diffs){
70             final switch( d.operation ){
71                 case Operation.INSERT:
72                     app.put("+");
73                     break;
74                 case Operation.DELETE:
75                     app.put("-");
76                     break;
77                 case Operation.EQUAL:
78                     app.put(" ");
79                     break;
80             }
81             //TODO: urlEncode
82             app.put(d.text);
83             app.put("\n");
84         }
85 
86         //TODO: unescapeForEncodeUriCompatability
87         return app.data();
88     }
89 }
90 
91 /**
92  * Increase the context until it is unique,
93  * but don't let the pattern expand beyond Match_MaxBits.
94  * @param patch The patch to grow.
95  * @param text Source text.
96  */
97 void addContext(Patch patch, string text)
98 {
99 	if( text.length == 0 ) return;
100 
101 	auto pattern = text.substr(patch.start2, patch.length1);
102 	int padding = 0;
103 
104 	// Look for the first and last matches of pattern in text.  If two
105 	// different matches are found, increase the pattern length.
106 	while( text.indexOfAlt(pattern) != text.lastIndexOf(pattern)
107 		  && pattern.length < MATCH_MAXBITS - PATCH_MARGIN - PATCH_MARGIN ){
108 		padding += PATCH_MARGIN;
109 		pattern = text[max(0, patch.start2 - padding)..min(text.length, patch .start2 + patch.length1 + padding)];		
110 	}
111 	// Add one chunk for good luck.
112 	padding += PATCH_MARGIN;
113 
114 	// Add the prefix.
115 	auto prefix = text[max(0, patch.start2 - padding)..patch.start2];
116 	if( prefix.length != 0 ){
117 		patch.diffs.insert(0, [Diff(Operation.EQUAL, prefix)]);
118 	}
119 
120 	// Add the suffix.
121 	auto suffix = text[patch.start2 + patch.length1..min(text.length, patch.start2 + patch.length1 + padding)];
122 	if( suffix.length != 0 ){
123 		patch.diffs ~= Diff(Operation.EQUAL, suffix);
124 	}
125 
126 	// Roll back the start points.
127 	patch.start1 -= prefix.length;
128 	patch.start2 -= prefix.length;
129 	// Extend the lengths.
130 	patch.length1 += prefix.length + suffix.length;
131 	patch.length2 += prefix.length + suffix.length;
132 }
133 
134 /**
135 * Compute a list of patches to turn text1 into text2.
136 * A set of diffs will be computed.
137 * @param text1 Old text.
138 * @param text2 New text.
139 * @return List of Patch objects.
140 */
141 Patch[] patch_make(string text1, string text2) {
142 	// No diffs provided, comAdde our own.
143 	auto diffs = diff_main(text1, text2, true);
144 	if (diffs.length > 2) {
145 		cleanupSemantic(diffs);
146 		cleanupEfficiency(diffs);
147 	}
148 	return patch_make(text1, diffs);
149 }
150 
151 
152 /**
153  * Compute a list of patches to turn text1 into text2.
154  * text1 will be derived from the provided diffs.
155  * @param diffs Array of Diff objects for text1 to text2.
156  * @return List of Patch objects.
157  */
158 Patch[] patch_make(Diff[] diffs) {
159   // Check for null inputs not needed since null can't be passed in C#.
160   // No origin string provided, comAdde our own.
161   auto text1 = diff_text1(diffs);
162   return patch_make(text1, diffs);
163 }
164 
165 
166 /**
167  * Compute a list of patches to turn text1 into text2.
168  * text2 is not provided, diffs are the delta between text1 and text2.
169  * @param text1 Old text.
170  * @param diffs Array of Diff objects for text1 to text2.
171  * @return List of Patch objects.
172  */
173 Patch[] patch_make(string text1, Diff[] diffs)
174 {
175 	Patch[] patches;
176 	if( diffs.length == 0 ) return patches;
177 	
178 	Patch patch;
179 	auto char_count1 = 0;  // Number of characters into the text1 string.
180 	auto char_count2 = 0;  // Number of characters into the text2 string.
181 	// Start with text1 (prepatch_text) and apply the diffs until we arrive at
182 	// text2 (postpatch_text). We recreate the patches one by one to determine
183 	// context info.
184 	auto prepatch_text = text1;
185 	auto postpatch_text = text1;
186 
187 	foreach( diff ; diffs ){
188 		if( patch.diffs.length == 0 && diff.operation != Operation.EQUAL ){
189 			// A new patch starts here.
190 			patch.start1 = char_count1;
191 			patch.start2 = char_count2;
192 		}
193 
194 		final switch(diff.operation){
195 			case Operation.INSERT:
196 				patch.diffs ~= diff;
197 				patch.length2 += diff.text.length;
198 				postpatch_text.insert(char_count2, diff.text);
199 				break;
200 			case Operation.DELETE:
201 				patch.length2 += diff.text.length;
202 				patch.diffs ~= diff;
203 				postpatch_text.remove(char_count2, diff.text.length);
204 				break;
205 			case Operation.EQUAL:
206 				if( diff.text.length <= 2 * PATCH_MARGIN && patch.diffs.length != 0 && diff != diffs[$-1] ){
207 					patch.diffs ~= diff;
208 					patch.length1 += diff.text.length;
209 					patch.length2 += diff.text.length;
210 				}
211 
212 				if( diff.text.length >= 2 * PATCH_MARGIN ){
213 					if( patch.diffs.length != 0 ){
214 						addContext(patch, prepatch_text);
215 						patches ~= patch;
216 						patch = Patch();
217 						prepatch_text = postpatch_text;
218 						char_count1 = char_count2;
219 					}
220 				}
221 				break;
222 		}
223         // Update the current character count.
224         if (diff.operation != Operation.INSERT) {
225           char_count1 += diff.text.length;
226         }
227         if (diff.operation != Operation.DELETE) {
228           char_count2 += diff.text.length;
229         }
230     }
231 	// Pick up the leftover patch if not empty.
232     if( !patch.diffs.empty ){
233     	addContext(patch, prepatch_text);
234     	patches ~= patch;
235     }
236 
237     return patches;
238 }
239 
240 
241 /**
242  * Merge a set of patches onto the text.  Return a patched text, as well
243  * as an array of true/false values indicating which patches were applied.
244  * @param patches Array of Patch objects
245  * @param text Old text.
246  * @return Two element Object array, containing the new text and an array of
247  *      bool values.
248  */
249 
250  struct PatchApplyResult {
251  	string text;
252  	bool[] patchesApplied;
253  }
254 
255  PatchApplyResult apply(Patch[] patches, string text) 
256  {
257  	PatchApplyResult result;
258  	if( patches.length == 0 ) return result;
259  
260  	auto nullPadding = addPadding(patches);
261  	text = nullPadding ~ text ~ nullPadding;
262  	splitMax(patches);
263 
264  	result.patchesApplied.length = patches.length; // init patchesApplied array
265  	int x = 0;
266 	// delta keeps track of the offset between the expected and actual
267 	// location of the previous patch.  If there are patches expected at
268 	// positions 10 and 20, but the first patch was found at 12, delta is 2
269 	// and the second patch has an effective expected position of 22.
270 	int delta = 0; 	
271 	foreach( patch ; patches ){
272 		auto expected_loc = patch.start2 + delta;
273 		auto text1 =  diff_text1(patch.diffs);
274 		int start_loc;
275 		int end_loc = -1;
276 		if( text1.length > MATCH_MAXBITS ){
277 			// patch_splitMax will only provide an oversized pattern
278          	// in the case of a monster delete
279          	start_loc = match_main(text, text1.substr(0, MATCH_MAXBITS), expected_loc);
280          	if( start_loc != -1 ){
281          		end_loc = match_main(text,
282          			text1.substr(text1.length - MATCH_MAXBITS),
283          			expected_loc + text1.length - MATCH_MAXBITS);
284          		if( end_loc == -1 || start_loc >= end_loc ){
285          			// Can't find valid trailing context.  Drop this patch.
286          			start_loc = -1;
287          		}
288          	}
289 		} else {
290 			start_loc = match_main(text, text1, expected_loc);
291 		}
292 		if( start_loc == -1 ){
293 			// No match found.  :(
294 			result.patchesApplied[x] = false;
295 			// Subtract the delta for this failed patch from subsequent patches.
296 			delta -= patch.length2 - patch.length1;
297 		} else {
298 			// Found a match. :)
299 			result.patchesApplied[x] = true;
300 			delta = start_loc - expected_loc;
301 			string text2;
302 			if( end_loc == -1 ){
303 				text2 = text[ start_loc .. min(start_loc + text1.length, text.length) ];
304 			} else {
305 				text2 = text[ start_loc .. min(end_loc + MATCH_MAXBITS, text.length) ];
306 			}
307 			if( text1 == text2 ) {
308 				// Perfect match, just shove the replacement text in.
309 				text = text.substr(0, start_loc) ~ diff_text2(patch.diffs) ~ text.substr(start_loc + text1.length);			
310 			} else {
311 				// Imperfect match. Run a diff to get a framework of equivalent indices.
312 				auto diffs = diff_main(text1, text2, false);
313 				if( text1.length > MATCH_MAXBITS && levenshtein(diffs) / cast(float)text1.length > PATCH_DELETE_THRESHOLD){
314 					// The end points match, but the content is unacceptably bad.
315 					result.patchesApplied[x] = false;
316 				} else {
317 					cleanupSemanticLossless(diffs);
318 					auto index1 = 0;
319 					foreach( diff; patch.diffs ){
320 						if( diff.operation != Operation.EQUAL ){
321 							auto index2 = xIndex(diffs, index1);
322 							if( diff.operation == Operation.INSERT ){
323 								// Insertion
324 								text.insert(start_loc + index2, diff.text);
325 							} else if( diff.operation == Operation.DELETE ){
326 								// Deletion
327 								text.remove(start_loc + index2, xIndex(diffs, index1 + diff.text.length) - index2);
328 							}
329 						}
330 						if( diff.operation != Operation.DELETE ){
331 							index1 += diff.text.length;
332 						}
333 					}
334 				}
335 			}
336 		}
337 		x++;
338 	} 
339 	// Strip the padding off.
340 	result.text = text.substr(nullPadding.length, text.length - 2 * nullPadding.length);
341 	return result;
342 }
343 
344 /**
345  * Add some padding on text start and end so that edges can match something.
346  * Intended to be called only from within patch_apply.
347  * @param patches Array of Patch objects.
348  * @return The padding string added to each side.
349  */
350 string addPadding(Patch[] patches)
351 {
352 	auto paddingLength = PATCH_MARGIN;
353 	string nullPadding;
354 	for(int x = 1; x <= paddingLength; x++){
355 		nullPadding ~= cast(char)x;
356 	}
357 
358 	// Bump all the patches forward.
359 	foreach( patch; patches ){
360 		patch.start1 += paddingLength;
361 		patch.start2 += paddingLength;
362 	}
363 
364 	// Add some padding on start of first diff.
365 	Patch patch = patches[0];
366 	auto diffs = patch.diffs;
367 	if( diffs.length == 0 || diffs[0].operation != Operation.EQUAL ){		
368 		// Add nullPadding equality.
369 		diffs.insert(0, [Diff(Operation.EQUAL, nullPadding)]);
370 		patch.start1 -= paddingLength;  // Should be 0.
371 		patch.start2 -= paddingLength;  // Should be 0.
372 		patch.length1 += paddingLength;
373 		patch.length2 += paddingLength;
374 	} else if (paddingLength > diffs[0].text.length) {
375 		// Grow first equality.
376 		Diff firstDiff = diffs[0];
377 		auto extraLength = paddingLength - firstDiff.text.length;
378 		firstDiff.text = nullPadding.substr(firstDiff.text.length) ~ firstDiff.text;
379 		patch.start1 -= extraLength;
380 		patch.start2 -= extraLength;
381 		patch.length1 += extraLength;
382 		patch.length2 += extraLength;
383 	}
384 
385 	// Add some padding on end of last diff.
386 	patch = patches[$-1];
387 	diffs = patch.diffs;
388 	if( diffs.length == 0 || diffs[$-1].operation != Operation.EQUAL) {
389 		// Add nullPadding equality.
390 		diffs ~= Diff(Operation.EQUAL, nullPadding);
391 		patch.length1 += paddingLength;
392 		patch.length2 += paddingLength;
393 	} else if (paddingLength > diffs[$-1].text.length) {
394 		// Grow last equality.
395 		Diff lastDiff = diffs[$-1];
396 		auto extraLength = paddingLength - lastDiff.text.length;
397 		lastDiff.text ~= nullPadding.substr(0, extraLength);
398 		patch.length1 += extraLength;
399 		patch.length2 += extraLength;
400 	}
401 	return nullPadding;
402 }
403 
404 /**
405  * Look through the patches and break up any which are longer than the
406  * maximum limit of the match algorithm.
407  * Intended to be called only from within patch_apply.
408  * @param patches List of Patch objects.
409  */
410 void splitMax(Patch[] patches)
411 {
412 	auto patch_size = MATCH_MAXBITS;
413 	for( auto x = 0; x < patches.length; x++ ){
414 		if( patches[x].length1 <= patch_size ) continue;
415 		Patch bigpatch = patches[x];
416 		patches.splice(x--, 1);
417 		auto start1 = bigpatch.start1;
418 		auto start2 = bigpatch.start2;
419 		string precontext;
420 		while( bigpatch.diffs.length != 0){
421 			Patch patch;
422 			bool empty = true;
423 			patch.start1 = start1 - precontext.length;
424 			patch.start2 = start2 - precontext.length;			
425 			if( precontext.length != 0 ){
426 				patch.length1 = patch.length2 = precontext.length;
427 				patch.diffs ~= Diff(Operation.EQUAL, precontext);
428 			}
429 			while( bigpatch.diffs.length != 0 && patch.length1 < patch_size - PATCH_MARGIN ){
430 				Operation diff_type = bigpatch.diffs[0].operation;
431 				auto diff_text = bigpatch.diffs[0].text;
432 				if( diff_type == Operation.INSERT ){
433 					// Insertions are harmless.
434 					patch.length2 += diff_text.length;
435 					start2 += diff_text.length;
436 					patch.diffs ~= bigpatch.diffs[0];
437 					bigpatch.diffs.remove(0);
438 					empty = false;
439 				} else if( diff_type == Operation.DELETE && patch.diffs.length == 1
440 					&& patch.diffs[0].operation == Operation.EQUAL
441 					&& diff_text.length > 2 * patch_size) {
442               		// This is a large deletion.  Let it pass in one chunk.
443               		patch.length1 += diff_text.length;
444               		start1 += diff_text.length;
445               		empty = false;
446               		patch.diffs ~= Diff(diff_type, diff_text);
447               		bigpatch.diffs.remove(0);
448 				} else {
449 					// Deletion or equality. Only takes as much as we can stomach.
450 					diff_text = diff_text.substr(0, min(diff_text.length, patch_size - patch.length1 - PATCH_MARGIN));
451 					patch.length1 += diff_text.length;
452 					start1 += diff_text.length;
453 					if( diff_type == Operation.EQUAL ){
454 						patch.length2 += diff_text.length;
455 						start2 += diff_text.length;
456 					} else {
457 						empty = false;
458 					}
459 					patch.diffs ~= Diff(diff_type, diff_text);
460 					if( diff_text == bigpatch.diffs[0].text ){
461 						bigpatch.diffs.remove(0);
462 					} else {
463 						bigpatch.diffs[0].text = bigpatch.diffs[0].text.substr(diff_text.length);
464 					}
465 				}
466 			}
467 			// Compute the head context for the next patch.
468 			precontext = diff_text2(patch.diffs);
469 			precontext = precontext.substr(max(0, precontext.length - PATCH_MARGIN));
470 
471 			auto postcontext = diff_text1(bigpatch.diffs);
472 			if( postcontext.length > PATCH_MARGIN ){
473 				postcontext =  postcontext.substr(0, PATCH_MARGIN);
474 			}
475 
476 			if( postcontext.length != 0 ){
477 				patch.length1 += postcontext.length;
478 				patch.length2 += postcontext.length;
479 				if( patch.diffs.length != 0 
480 					&& patch.diffs[patch.diffs.length - 1].operation
481 					== Operation.EQUAL) {
482 					patch.diffs[$].text ~= postcontext;
483 				} else {
484 					patch.diffs ~= Diff(Operation.EQUAL, postcontext);
485 				}
486 			}
487 			if( !empty ){
488 				patches.splice(++x, 0, [patch]);
489 			}
490 		}
491 	}
492 }
493 
494 /**
495  * Take a list of patches and return a textual representation.
496  * @param patches List of Patch objects.
497  * @return Text representation of patches.
498  */
499 public string patch_toText(in Patch[] patches)
500 {
501 	auto text = appender!string();
502 	foreach (aPatch; patches)
503 		text ~= aPatch.toString();
504 	return text.data;
505 }
506 
507 /**
508  * Parse a textual representation of patches and return a List of Patch
509  * objects.
510  * @param textline Text representation of patches.
511  * @return List of Patch objects.
512  * @throws ArgumentException If invalid input.
513  */
514 public Patch[] patch_fromText(string textline)
515 {
516 	import std.regex : regex, matchFirst;
517 	import std..string : format, split;
518 
519 	auto patches = appender!(Patch[])();
520 	if (textline.length == 0) return null;
521 
522 	auto text = textline.split("\n");
523 	int textPointer = 0;
524 	auto patchHeader = regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
525 	char sign;
526 	string line;
527 	while (textPointer < text.length) {
528 		auto m = matchFirst(text[textPointer], patchHeader);
529 		enforce (m, "Invalid patch string: " ~ text[textPointer]);
530 		Patch patch;
531 		patch.start1 = m[0].to!int;
532 		if (m[1].length == 0) {
533 			patch.start1--;
534 			patch.length1 = 1;
535 		} else if (m[1] == "0") {
536 			patch.length1 = 0;
537 		} else {
538 			patch.start1--;
539 			patch.length1 = m[1].to!int;
540 		}
541 
542 		patch.start2 = m[2].to!int;
543 		if (m[3].length == 0) {
544 			patch.start2--;
545 			patch.length2 = 1;
546 		} else if (m[3] == "0") {
547 			patch.length2 = 0;
548 		} else {
549 			patch.start2--;
550 			patch.length2 = m[3].to!int;
551 		}
552 		textPointer++;
553 
554 		while (textPointer < text.length) {
555 			import std.uri : decode;
556 			if (textPointer >= text.length || !text[textPointer].length) {
557 				// Blank line?  Whatever.
558 				textPointer++;
559 				continue;
560 			}
561 			sign = text[textPointer][0];
562 			line = text[textPointer][1 .. $];
563 			line = line.replace("+", "%2b");
564 			line = decode(line);
565 			if (sign == '-') {
566 				// Deletion.
567 				patch.diffs ~= Diff(Operation.DELETE, line);
568 			} else if (sign == '+') {
569 				// Insertion.
570 				patch.diffs ~= Diff(Operation.INSERT, line);
571 			} else if (sign == ' ') {
572 				// Minor equality.
573 				patch.diffs ~= Diff(Operation.EQUAL, line);
574 			} else if (sign == '@') {
575 				// Start of next patch.
576 				break;
577 			} else {
578 				// WTF?
579 				throw new Exception(format("Invalid patch mode '%s' in: %s", sign, line));
580 			}
581 			textPointer++;
582 		}
583 
584 		patches ~= patch;
585 	}
586 	return patches.data;
587 }