1 /*
2  * Copyright 2008 Google Inc. All Rights Reserved.
3  * Copyright 2013-2014 Jan Krüger. All Rights Reserved.
4  * Author: fraser@google.com (Neil Fraser)
5  * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas)
6  * Author: jan@jandoe.de (Jan Krüger)
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *   http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  * Diff Match and Patch
21  * http://code.google.com/p/google-diff-match-patch/
22  */
23 module ddmp.patch;
24 import std.algorithm : min, max;
25 import std.array;
26 import std.conv;
27 import std.exception : enforce;
28 import std..string:lastIndexOf;
29 
30 import ddmp.diff;
31 import ddmp.match;
32 import ddmp.util;
33 
34 int MATCH_MAXBITS = 32;
35 int PATCH_MARGIN = 4;
36 float PATCH_DELETE_THRESHOLD = 0.5f;
37 
38 struct Patch {
39     Diff[] diffs;
40     sizediff_t start1;
41     sizediff_t start2;
42     sizediff_t length1;
43     sizediff_t length2;
44 
45     string toString()
46     const {
47     	import std.uri : encode;
48 
49         auto app = appender!string();
50         app.put("@@ -");
51         if( length1 == 0 ){
52             app.put(to!string(start1));
53             app.put(",0");
54         } else if( length1 == 1 ){
55             app.put(to!string(start1 + 1));
56         } else {
57             app.put(to!string(start1 + 1));
58             app.put(",");
59             app.put(to!string(length1));
60         }
61         app.put(" +");
62         if( length2 == 0 ){
63             app.put(to!string(start2));
64             app.put(",0");
65         } else if( length2 == 1 ){
66             app.put(to!string(start2 + 1));
67         } else {
68             app.put(to!string(start2 + 1));
69             app.put(",");
70             app.put(to!string(length2));
71         }
72         app.put(" @@\n");
73         foreach( d ; diffs){
74             final switch( d.operation ){
75                 case Operation.INSERT:
76                     app.put("+");
77                     break;
78                 case Operation.DELETE:
79                     app.put("-");
80                     break;
81                 case Operation.EQUAL:
82                     app.put(" ");
83                     break;
84             }
85             app.put(encode(d.text).replace("%20", " "));
86             app.put("\n");
87         }
88 
89         return unescapeForEncodeUriCompatibility(app.data());
90     }
91 }
92 
93 /**
94  * Increase the context until it is unique,
95  * but don't let the pattern expand beyond Match_MaxBits.
96  * @param patch The patch to grow.
97  * @param text Source text.
98  */
99 void addContext(ref Patch patch, string text)
100 {
101 	if( text.length == 0 ) return;
102 
103 	auto pattern = text.substr(patch.start2, patch.length1);
104 	sizediff_t padding = 0;
105 
106 	// Look for the first and last matches of pattern in text.  If two
107 	// different matches are found, increase the pattern length.
108 	while( text.indexOfAlt(pattern) != text.lastIndexOf(pattern)
109 		  && pattern.length < MATCH_MAXBITS - PATCH_MARGIN - PATCH_MARGIN ){
110 		padding += PATCH_MARGIN;
111 		pattern = text[max(0, patch.start2 - padding)..min(text.length, patch .start2 + patch.length1 + padding)];		
112 	}
113 	// Add one chunk for good luck.
114 	padding += PATCH_MARGIN;
115 
116 	// Add the prefix.
117 	auto prefix = text[max(0, patch.start2 - padding)..patch.start2];
118 	if( prefix.length != 0 ){
119 		patch.diffs.insert(0, [Diff(Operation.EQUAL, prefix)]);
120 	}
121 
122 	// Add the suffix.
123 	auto suffix = text[patch.start2 + patch.length1..min(text.length, patch.start2 + patch.length1 + padding)];
124 	if( suffix.length != 0 ){
125 		patch.diffs ~= Diff(Operation.EQUAL, suffix);
126 	}
127 
128 	// Roll back the start points.
129 	patch.start1 -= prefix.length;
130 	patch.start2 -= prefix.length;
131 	// Extend the lengths.
132 	patch.length1 += prefix.length + suffix.length;
133 	patch.length2 += prefix.length + suffix.length;
134 }
135 
136 /**
137 * Compute a list of patches to turn text1 into text2.
138 * A set of diffs will be computed.
139 * @param text1 Old text.
140 * @param text2 New text.
141 * @return List of Patch objects.
142 */
143 Patch[] patch_make(string text1, string text2) {
144 	// No diffs provided, comAdde our own.
145 	auto diffs = diff_main(text1, text2, true);
146 	if (diffs.length > 2) {
147 		cleanupSemantic(diffs);
148 		cleanupEfficiency(diffs);
149 	}
150 	return patch_make(text1, diffs);
151 }
152 
153 
154 /**
155  * Compute a list of patches to turn text1 into text2.
156  * text1 will be derived from the provided diffs.
157  * @param diffs Array of Diff objects for text1 to text2.
158  * @return List of Patch objects.
159  */
160 Patch[] patch_make(Diff[] diffs) {
161   // Check for null inputs not needed since null can't be passed in C#.
162   // No origin string provided, comAdde our own.
163   auto text1 = diff_text1(diffs);
164   return patch_make(text1, diffs);
165 }
166 
167 
168 /**
169  * Compute a list of patches to turn text1 into text2.
170  * text2 is not provided, diffs are the delta between text1 and text2.
171  * @param text1 Old text.
172  * @param diffs Array of Diff objects for text1 to text2.
173  * @return List of Patch objects.
174  */
175 Patch[] patch_make(string text1, Diff[] diffs)
176 {
177 	Patch[] patches;
178 	if( diffs.length == 0 ) return patches;
179 	
180 	Patch patch;
181 	auto char_count1 = 0;  // Number of characters into the text1 string.
182 	auto char_count2 = 0;  // Number of characters into the text2 string.
183 	// Start with text1 (prepatch_text) and apply the diffs until we arrive at
184 	// text2 (postpatch_text). We recreate the patches one by one to determine
185 	// context info.
186 	auto prepatch_text = text1;
187 	auto postpatch_text = text1;
188 
189 	foreach( diff ; diffs ){
190 		if( patch.diffs.length == 0 && diff.operation != Operation.EQUAL ){
191 			// A new patch starts here.
192 			patch.start1 = char_count1;
193 			patch.start2 = char_count2;
194 		}
195 
196 		final switch(diff.operation){
197 			case Operation.INSERT:
198 				patch.diffs ~= diff;
199 				patch.length2 += diff.text.length;
200 				postpatch_text.insert(char_count2, diff.text);
201 				break;
202 			case Operation.DELETE:
203 				patch.length2 += diff.text.length;
204 				patch.diffs ~= diff;
205 				postpatch_text.remove(char_count2, diff.text.length);
206 				break;
207 			case Operation.EQUAL:
208 				if( diff.text.length <= 2 * PATCH_MARGIN && patch.diffs.length != 0 && diff != diffs[$-1] ){
209 					patch.diffs ~= diff;
210 					patch.length1 += diff.text.length;
211 					patch.length2 += diff.text.length;
212 				}
213 
214 				if( diff.text.length >= 2 * PATCH_MARGIN ){
215 					if( patch.diffs.length != 0 ){
216 						addContext(patch, prepatch_text);
217 						patches ~= patch;
218 						patch = Patch();
219 						prepatch_text = postpatch_text;
220 						char_count1 = char_count2;
221 					}
222 				}
223 				break;
224 		}
225         // Update the current character count.
226         if (diff.operation != Operation.INSERT) {
227           char_count1 += diff.text.length;
228         }
229         if (diff.operation != Operation.DELETE) {
230           char_count2 += diff.text.length;
231         }
232     }
233 	// Pick up the leftover patch if not empty.
234     if( !patch.diffs.empty ){
235     	addContext(patch, prepatch_text);
236     	patches ~= patch;
237     }
238 
239     return patches;
240 }
241 
242 
243 /**
244  * Merge a set of patches onto the text.  Return a patched text, as well
245  * as an array of true/false values indicating which patches were applied.
246  * @param patches Array of Patch objects
247  * @param text Old text.
248  * @return Two element Object array, containing the new text and an array of
249  *      bool values.
250  */
251 
252  struct PatchApplyResult {
253  	string text;
254  	bool[] patchesApplied;
255  }
256 
257  PatchApplyResult apply(Patch[] patches, string text) 
258  {
259  	PatchApplyResult result;
260  	if( patches.length == 0 ) return result;
261  
262  	auto nullPadding = addPadding(patches);
263  	text = nullPadding ~ text ~ nullPadding;
264  	splitMax(patches);
265 
266  	result.patchesApplied.length = patches.length; // init patchesApplied array
267  	sizediff_t x = 0;
268 	// delta keeps track of the offset between the expected and actual
269 	// location of the previous patch.  If there are patches expected at
270 	// positions 10 and 20, but the first patch was found at 12, delta is 2
271 	// and the second patch has an effective expected position of 22.
272 	sizediff_t delta = 0; 	
273 	foreach( patch ; patches ){
274 		auto expected_loc = patch.start2 + delta;
275 		auto text1 =  diff_text1(patch.diffs);
276 		sizediff_t start_loc;
277 		sizediff_t end_loc = -1;
278 		if( text1.length > MATCH_MAXBITS ){
279 			// patch_splitMax will only provide an oversized pattern
280          	// in the case of a monster delete
281          	start_loc = match_main(text, text1.substr(0, MATCH_MAXBITS), expected_loc);
282          	if( start_loc != -1 ){
283          		end_loc = match_main(text,
284          			text1.substr(text1.length - MATCH_MAXBITS),
285          			expected_loc + text1.length - MATCH_MAXBITS);
286          		if( end_loc == -1 || start_loc >= end_loc ){
287          			// Can't find valid trailing context.  Drop this patch.
288          			start_loc = -1;
289          		}
290          	}
291 		} else {
292 			start_loc = match_main(text, text1, expected_loc);
293 		}
294 		if( start_loc == -1 ){
295 			// No match found.  :(
296 			result.patchesApplied[x] = false;
297 			// Subtract the delta for this failed patch from subsequent patches.
298 			delta -= patch.length2 - patch.length1;
299 		} else {
300 			// Found a match. :)
301 			result.patchesApplied[x] = true;
302 			delta = start_loc - expected_loc;
303 			string text2;
304 			if( end_loc == -1 ){
305 				text2 = text[ start_loc .. min(start_loc + text1.length, text.length) ];
306 			} else {
307 				text2 = text[ start_loc .. min(end_loc + MATCH_MAXBITS, text.length) ];
308 			}
309 			if( text1 == text2 ) {
310 				// Perfect match, just shove the replacement text in.
311 				text = text.substr(0, start_loc) ~ diff_text2(patch.diffs) ~ text.substr(start_loc + text1.length);			
312 			} else {
313 				// Imperfect match. Run a diff to get a framework of equivalent indices.
314 				auto diffs = diff_main(text1, text2, false);
315 				if( text1.length > MATCH_MAXBITS && levenshtein(diffs) / cast(float)text1.length > PATCH_DELETE_THRESHOLD){
316 					// The end points match, but the content is unacceptably bad.
317 					result.patchesApplied[x] = false;
318 				} else {
319 					cleanupSemanticLossless(diffs);
320 					auto index1 = 0;
321 					foreach( diff; patch.diffs ){
322 						if( diff.operation != Operation.EQUAL ){
323 							auto index2 = xIndex(diffs, index1);
324 							if( diff.operation == Operation.INSERT ){
325 								// Insertion
326 								text.insert(start_loc + index2, diff.text);
327 							} else if( diff.operation == Operation.DELETE ){
328 								// Deletion
329 								text.remove(start_loc + index2, xIndex(diffs, index1 + diff.text.length) - index2);
330 							}
331 						}
332 						if( diff.operation != Operation.DELETE ){
333 							index1 += diff.text.length;
334 						}
335 					}
336 				}
337 			}
338 		}
339 		x++;
340 	} 
341 	// Strip the padding off.
342 	result.text = text.substr(nullPadding.length, text.length - 2 * nullPadding.length);
343 	return result;
344 }
345 
346 /**
347  * Add some padding on text start and end so that edges can match something.
348  * Intended to be called only from within patch_apply.
349  * @param patches Array of Patch objects.
350  * @return The padding string added to each side.
351  */
352 string addPadding(Patch[] patches)
353 {
354 	auto paddingLength = PATCH_MARGIN;
355 	string nullPadding;
356 	for(sizediff_t x = 1; x <= paddingLength; x++){
357 		nullPadding ~= cast(char)x;
358 	}
359 
360 	// Bump all the patches forward.
361 	foreach( patch; patches ){
362 		patch.start1 += paddingLength;
363 		patch.start2 += paddingLength;
364 	}
365 
366 	// Add some padding on start of first diff.
367 	Patch patch = patches[0];
368 	auto diffs = patch.diffs;
369 	if( diffs.length == 0 || diffs[0].operation != Operation.EQUAL ){		
370 		// Add nullPadding equality.
371 		diffs.insert(0, [Diff(Operation.EQUAL, nullPadding)]);
372 		patch.start1 -= paddingLength;  // Should be 0.
373 		patch.start2 -= paddingLength;  // Should be 0.
374 		patch.length1 += paddingLength;
375 		patch.length2 += paddingLength;
376 	} else if (paddingLength > diffs[0].text.length) {
377 		// Grow first equality.
378 		Diff firstDiff = diffs[0];
379 		auto extraLength = paddingLength - firstDiff.text.length;
380 		firstDiff.text = nullPadding.substr(firstDiff.text.length) ~ firstDiff.text;
381 		patch.start1 -= extraLength;
382 		patch.start2 -= extraLength;
383 		patch.length1 += extraLength;
384 		patch.length2 += extraLength;
385 	}
386 
387 	// Add some padding on end of last diff.
388 	patch = patches[$-1];
389 	diffs = patch.diffs;
390 	if( diffs.length == 0 || diffs[$-1].operation != Operation.EQUAL) {
391 		// Add nullPadding equality.
392 		diffs ~= Diff(Operation.EQUAL, nullPadding);
393 		patch.length1 += paddingLength;
394 		patch.length2 += paddingLength;
395 	} else if (paddingLength > diffs[$-1].text.length) {
396 		// Grow last equality.
397 		Diff lastDiff = diffs[$-1];
398 		auto extraLength = paddingLength - lastDiff.text.length;
399 		lastDiff.text ~= nullPadding.substr(0, extraLength);
400 		patch.length1 += extraLength;
401 		patch.length2 += extraLength;
402 	}
403 	return nullPadding;
404 }
405 
406 /**
407  * Look through the patches and break up any which are longer than the
408  * maximum limit of the match algorithm.
409  * Intended to be called only from within patch_apply.
410  * @param patches List of Patch objects.
411  */
412 void splitMax(Patch[] patches)
413 {
414 	auto patch_size = MATCH_MAXBITS;
415 	for( auto x = 0; x < patches.length; x++ ){
416 		if( patches[x].length1 <= patch_size ) continue;
417 		Patch bigpatch = patches[x];
418 		patches.splice(x--, 1);
419 		auto start1 = bigpatch.start1;
420 		auto start2 = bigpatch.start2;
421 		string precontext;
422 		while( bigpatch.diffs.length != 0){
423 			Patch patch;
424 			bool empty = true;
425 			patch.start1 = start1 - precontext.length;
426 			patch.start2 = start2 - precontext.length;			
427 			if( precontext.length != 0 ){
428 				patch.length1 = patch.length2 = precontext.length;
429 				patch.diffs ~= Diff(Operation.EQUAL, precontext);
430 			}
431 			while( bigpatch.diffs.length != 0 && patch.length1 < patch_size - PATCH_MARGIN ){
432 				Operation diff_type = bigpatch.diffs[0].operation;
433 				auto diff_text = bigpatch.diffs[0].text;
434 				if( diff_type == Operation.INSERT ){
435 					// Insertions are harmless.
436 					patch.length2 += diff_text.length;
437 					start2 += diff_text.length;
438 					patch.diffs ~= bigpatch.diffs[0];
439 					bigpatch.diffs.remove(0);
440 					empty = false;
441 				} else if( diff_type == Operation.DELETE && patch.diffs.length == 1
442 					&& patch.diffs[0].operation == Operation.EQUAL
443 					&& diff_text.length > 2 * patch_size) {
444               		// This is a large deletion.  Let it pass in one chunk.
445               		patch.length1 += diff_text.length;
446               		start1 += diff_text.length;
447               		empty = false;
448               		patch.diffs ~= Diff(diff_type, diff_text);
449               		bigpatch.diffs.remove(0);
450 				} else {
451 					// Deletion or equality. Only takes as much as we can stomach.
452 					diff_text = diff_text.substr(0, min(diff_text.length, patch_size - patch.length1 - PATCH_MARGIN));
453 					patch.length1 += diff_text.length;
454 					start1 += diff_text.length;
455 					if( diff_type == Operation.EQUAL ){
456 						patch.length2 += diff_text.length;
457 						start2 += diff_text.length;
458 					} else {
459 						empty = false;
460 					}
461 					patch.diffs ~= Diff(diff_type, diff_text);
462 					if( diff_text == bigpatch.diffs[0].text ){
463 						bigpatch.diffs.remove(0);
464 					} else {
465 						bigpatch.diffs[0].text = bigpatch.diffs[0].text.substr(diff_text.length);
466 					}
467 				}
468 			}
469 			// Compute the head context for the next patch.
470 			precontext = diff_text2(patch.diffs);
471 			precontext = precontext.substr(max(0, precontext.length - PATCH_MARGIN));
472 
473 			auto postcontext = diff_text1(bigpatch.diffs);
474 			if( postcontext.length > PATCH_MARGIN ){
475 				postcontext =  postcontext.substr(0, PATCH_MARGIN);
476 			}
477 
478 			if( postcontext.length != 0 ){
479 				patch.length1 += postcontext.length;
480 				patch.length2 += postcontext.length;
481 				if( patch.diffs.length != 0 
482 					&& patch.diffs[patch.diffs.length - 1].operation
483 					== Operation.EQUAL) {
484 					patch.diffs[$].text ~= postcontext;
485 				} else {
486 					patch.diffs ~= Diff(Operation.EQUAL, postcontext);
487 				}
488 			}
489 			if( !empty ){
490 				patches.splice(++x, 0, [patch]);
491 			}
492 		}
493 	}
494 }
495 
496 /**
497  * Take a list of patches and return a textual representation.
498  * @param patches List of Patch objects.
499  * @return Text representation of patches.
500  */
501 public string patch_toText(in Patch[] patches)
502 {
503 	auto text = appender!string();
504 	foreach (aPatch; patches)
505 		text ~= aPatch.toString();
506 	return text.data;
507 }
508 
509 /**
510  * Parse a textual representation of patches and return a List of Patch
511  * objects.
512  * @param textline Text representation of patches.
513  * @return List of Patch objects.
514  * @throws ArgumentException If invalid input.
515  */
516 public Patch[] patch_fromText(string textline)
517 {
518 	import std.regex : regex, matchFirst;
519 	import std..string : format, split;
520 
521 	auto patches = appender!(Patch[])();
522 	if (textline.length == 0) return null;
523 
524 	auto text = textline.split("\n");
525 	sizediff_t textPointer = 0;
526 	auto patchHeader = regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
527 	char sign;
528 	string line;
529 	while (textPointer < text.length) {
530 		auto m = matchFirst(text[textPointer], patchHeader);
531 		enforce (m, "Invalid patch string: " ~ text[textPointer]);
532 		Patch patch;
533 		patch.start1 = m[1].to!sizediff_t;
534 		if (m[2].length == 0) {
535 			patch.start1--;
536 			patch.length1 = 1;
537 		} else if (m[2] == "0") {
538 			patch.length1 = 0;
539 		} else {
540 			patch.start1--;
541 			patch.length1 = m[2].to!sizediff_t;
542 		}
543 
544 		patch.start2 = m[3].to!sizediff_t;
545 		if (m[4].length == 0) {
546 			patch.start2--;
547 			patch.length2 = 1;
548 		} else if (m[4] == "0") {
549 			patch.length2 = 0;
550 		} else {
551 			patch.start2--;
552 			patch.length2 = m[4].to!sizediff_t;
553 		}
554 		textPointer++;
555 
556 		while (textPointer < text.length) {
557 			import std.uri : decodeComponent;
558 			if (textPointer >= text.length || !text[textPointer].length) {
559 				// Blank line?  Whatever.
560 				textPointer++;
561 				continue;
562 			}
563 			sign = text[textPointer][0];
564 			line = text[textPointer][1 .. $];
565 			line = line.replace("+", "%2b");
566 			line = decodeComponent(line);
567 			if (sign == '-') {
568 				// Deletion.
569 				patch.diffs ~= Diff(Operation.DELETE, line);
570 			} else if (sign == '+') {
571 				// Insertion.
572 				patch.diffs ~= Diff(Operation.INSERT, line);
573 			} else if (sign == ' ') {
574 				// Minor equality.
575 				patch.diffs ~= Diff(Operation.EQUAL, line);
576 			} else if (sign == '@') {
577 				// Start of next patch.
578 				break;
579 			} else {
580 				// WTF?
581 				throw new Exception(format("Invalid patch mode '%s' in: %s", sign, line));
582 			}
583 			textPointer++;
584 		}
585 
586 		patches ~= patch;
587 	}
588 	return patches.data;
589 }