1 /* 2 * Copyright 2008 Google Inc. All Rights Reserved. 3 * Copyright 2013-2014 Jan Krüger. All Rights Reserved. 4 * Author: fraser@google.com (Neil Fraser) 5 * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas) 6 * Author: jan@jandoe.de (Jan Krüger) 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 * Diff Match and Patch 21 * http://code.google.com/p/google-diff-match-patch/ 22 */ 23 module ddmp.patch; 24 import std.algorithm : min, max; 25 import std.array; 26 import std.conv; 27 import std.exception : enforce; 28 import std..string:lastIndexOf; 29 30 import ddmp.diff; 31 import ddmp.match; 32 import ddmp.util; 33 34 int MATCH_MAXBITS = 32; 35 int PATCH_MARGIN = 4; 36 float PATCH_DELETE_THRESHOLD = 0.5f; 37 38 struct Patch { 39 Diff[] diffs; 40 sizediff_t start1; 41 sizediff_t start2; 42 sizediff_t length1; 43 sizediff_t length2; 44 45 string toString() 46 const { 47 import std.uri : encode; 48 49 auto app = appender!string(); 50 app.put("@@ -"); 51 if( length1 == 0 ){ 52 app.put(to!string(start1)); 53 app.put(",0"); 54 } else if( length1 == 1 ){ 55 app.put(to!string(start1 + 1)); 56 } else { 57 app.put(to!string(start1 + 1)); 58 app.put(","); 59 app.put(to!string(length1)); 60 } 61 app.put(" +"); 62 if( length2 == 0 ){ 63 app.put(to!string(start2)); 64 app.put(",0"); 65 } else if( length2 == 1 ){ 66 app.put(to!string(start2 + 1)); 67 } else { 68 app.put(to!string(start2 + 1)); 69 app.put(","); 70 app.put(to!string(length2)); 71 } 72 app.put(" @@\n"); 73 foreach( d ; diffs){ 74 final switch( d.operation ){ 75 case Operation.INSERT: 76 app.put("+"); 77 break; 78 case Operation.DELETE: 79 app.put("-"); 80 break; 81 case Operation.EQUAL: 82 app.put(" "); 83 break; 84 } 85 app.put(encode(d.text).replace("%20", " ")); 86 app.put("\n"); 87 } 88 89 return unescapeForEncodeUriCompatibility(app.data()); 90 } 91 } 92 93 /** 94 * Increase the context until it is unique, 95 * but don't let the pattern expand beyond Match_MaxBits. 96 * @param patch The patch to grow. 97 * @param text Source text. 98 */ 99 void addContext(ref Patch patch, string text) 100 { 101 if( text.length == 0 ) return; 102 103 auto pattern = text.substr(patch.start2, patch.length1); 104 sizediff_t padding = 0; 105 106 // Look for the first and last matches of pattern in text. If two 107 // different matches are found, increase the pattern length. 108 while( text.indexOfAlt(pattern) != text.lastIndexOf(pattern) 109 && pattern.length < MATCH_MAXBITS - PATCH_MARGIN - PATCH_MARGIN ){ 110 padding += PATCH_MARGIN; 111 pattern = text[max(0, patch.start2 - padding)..min(text.length, patch .start2 + patch.length1 + padding)]; 112 } 113 // Add one chunk for good luck. 114 padding += PATCH_MARGIN; 115 116 // Add the prefix. 117 auto prefix = text[max(0, patch.start2 - padding)..patch.start2]; 118 if( prefix.length != 0 ){ 119 patch.diffs.insert(0, [Diff(Operation.EQUAL, prefix)]); 120 } 121 122 // Add the suffix. 123 auto suffix = text[patch.start2 + patch.length1..min(text.length, patch.start2 + patch.length1 + padding)]; 124 if( suffix.length != 0 ){ 125 patch.diffs ~= Diff(Operation.EQUAL, suffix); 126 } 127 128 // Roll back the start points. 129 patch.start1 -= prefix.length; 130 patch.start2 -= prefix.length; 131 // Extend the lengths. 132 patch.length1 += prefix.length + suffix.length; 133 patch.length2 += prefix.length + suffix.length; 134 } 135 136 /** 137 * Compute a list of patches to turn text1 into text2. 138 * A set of diffs will be computed. 139 * @param text1 Old text. 140 * @param text2 New text. 141 * @return List of Patch objects. 142 */ 143 Patch[] patch_make(string text1, string text2) { 144 // No diffs provided, comAdde our own. 145 auto diffs = diff_main(text1, text2, true); 146 if (diffs.length > 2) { 147 cleanupSemantic(diffs); 148 cleanupEfficiency(diffs); 149 } 150 return patch_make(text1, diffs); 151 } 152 153 154 /** 155 * Compute a list of patches to turn text1 into text2. 156 * text1 will be derived from the provided diffs. 157 * @param diffs Array of Diff objects for text1 to text2. 158 * @return List of Patch objects. 159 */ 160 Patch[] patch_make(Diff[] diffs) { 161 // Check for null inputs not needed since null can't be passed in C#. 162 // No origin string provided, comAdde our own. 163 auto text1 = diff_text1(diffs); 164 return patch_make(text1, diffs); 165 } 166 167 168 /** 169 * Compute a list of patches to turn text1 into text2. 170 * text2 is not provided, diffs are the delta between text1 and text2. 171 * @param text1 Old text. 172 * @param diffs Array of Diff objects for text1 to text2. 173 * @return List of Patch objects. 174 */ 175 Patch[] patch_make(string text1, Diff[] diffs) 176 { 177 Patch[] patches; 178 if( diffs.length == 0 ) return patches; 179 180 Patch patch; 181 auto char_count1 = 0; // Number of characters into the text1 string. 182 auto char_count2 = 0; // Number of characters into the text2 string. 183 // Start with text1 (prepatch_text) and apply the diffs until we arrive at 184 // text2 (postpatch_text). We recreate the patches one by one to determine 185 // context info. 186 auto prepatch_text = text1; 187 auto postpatch_text = text1; 188 189 foreach( diff ; diffs ){ 190 if( patch.diffs.length == 0 && diff.operation != Operation.EQUAL ){ 191 // A new patch starts here. 192 patch.start1 = char_count1; 193 patch.start2 = char_count2; 194 } 195 196 final switch(diff.operation){ 197 case Operation.INSERT: 198 patch.diffs ~= diff; 199 patch.length2 += diff.text.length; 200 postpatch_text.insert(char_count2, diff.text); 201 break; 202 case Operation.DELETE: 203 patch.length2 += diff.text.length; 204 patch.diffs ~= diff; 205 postpatch_text.remove(char_count2, diff.text.length); 206 break; 207 case Operation.EQUAL: 208 if( diff.text.length <= 2 * PATCH_MARGIN && patch.diffs.length != 0 && diff != diffs[$-1] ){ 209 patch.diffs ~= diff; 210 patch.length1 += diff.text.length; 211 patch.length2 += diff.text.length; 212 } 213 214 if( diff.text.length >= 2 * PATCH_MARGIN ){ 215 if( patch.diffs.length != 0 ){ 216 addContext(patch, prepatch_text); 217 patches ~= patch; 218 patch = Patch(); 219 prepatch_text = postpatch_text; 220 char_count1 = char_count2; 221 } 222 } 223 break; 224 } 225 // Update the current character count. 226 if (diff.operation != Operation.INSERT) { 227 char_count1 += diff.text.length; 228 } 229 if (diff.operation != Operation.DELETE) { 230 char_count2 += diff.text.length; 231 } 232 } 233 // Pick up the leftover patch if not empty. 234 if( !patch.diffs.empty ){ 235 addContext(patch, prepatch_text); 236 patches ~= patch; 237 } 238 239 return patches; 240 } 241 242 243 /** 244 * Merge a set of patches onto the text. Return a patched text, as well 245 * as an array of true/false values indicating which patches were applied. 246 * @param patches Array of Patch objects 247 * @param text Old text. 248 * @return Two element Object array, containing the new text and an array of 249 * bool values. 250 */ 251 252 struct PatchApplyResult { 253 string text; 254 bool[] patchesApplied; 255 } 256 257 PatchApplyResult apply(Patch[] patches, string text) 258 { 259 PatchApplyResult result; 260 if( patches.length == 0 ) return result; 261 262 auto nullPadding = addPadding(patches); 263 text = nullPadding ~ text ~ nullPadding; 264 splitMax(patches); 265 266 result.patchesApplied.length = patches.length; // init patchesApplied array 267 sizediff_t x = 0; 268 // delta keeps track of the offset between the expected and actual 269 // location of the previous patch. If there are patches expected at 270 // positions 10 and 20, but the first patch was found at 12, delta is 2 271 // and the second patch has an effective expected position of 22. 272 sizediff_t delta = 0; 273 foreach( patch ; patches ){ 274 auto expected_loc = patch.start2 + delta; 275 auto text1 = diff_text1(patch.diffs); 276 sizediff_t start_loc; 277 sizediff_t end_loc = -1; 278 if( text1.length > MATCH_MAXBITS ){ 279 // patch_splitMax will only provide an oversized pattern 280 // in the case of a monster delete 281 start_loc = match_main(text, text1.substr(0, MATCH_MAXBITS), expected_loc); 282 if( start_loc != -1 ){ 283 end_loc = match_main(text, 284 text1.substr(text1.length - MATCH_MAXBITS), 285 expected_loc + text1.length - MATCH_MAXBITS); 286 if( end_loc == -1 || start_loc >= end_loc ){ 287 // Can't find valid trailing context. Drop this patch. 288 start_loc = -1; 289 } 290 } 291 } else { 292 start_loc = match_main(text, text1, expected_loc); 293 } 294 if( start_loc == -1 ){ 295 // No match found. :( 296 result.patchesApplied[x] = false; 297 // Subtract the delta for this failed patch from subsequent patches. 298 delta -= patch.length2 - patch.length1; 299 } else { 300 // Found a match. :) 301 result.patchesApplied[x] = true; 302 delta = start_loc - expected_loc; 303 string text2; 304 if( end_loc == -1 ){ 305 text2 = text[ start_loc .. min(start_loc + text1.length, text.length) ]; 306 } else { 307 text2 = text[ start_loc .. min(end_loc + MATCH_MAXBITS, text.length) ]; 308 } 309 if( text1 == text2 ) { 310 // Perfect match, just shove the replacement text in. 311 text = text.substr(0, start_loc) ~ diff_text2(patch.diffs) ~ text.substr(start_loc + text1.length); 312 } else { 313 // Imperfect match. Run a diff to get a framework of equivalent indices. 314 auto diffs = diff_main(text1, text2, false); 315 if( text1.length > MATCH_MAXBITS && levenshtein(diffs) / cast(float)text1.length > PATCH_DELETE_THRESHOLD){ 316 // The end points match, but the content is unacceptably bad. 317 result.patchesApplied[x] = false; 318 } else { 319 cleanupSemanticLossless(diffs); 320 auto index1 = 0; 321 foreach( diff; patch.diffs ){ 322 if( diff.operation != Operation.EQUAL ){ 323 auto index2 = xIndex(diffs, index1); 324 if( diff.operation == Operation.INSERT ){ 325 // Insertion 326 text.insert(start_loc + index2, diff.text); 327 } else if( diff.operation == Operation.DELETE ){ 328 // Deletion 329 text.remove(start_loc + index2, xIndex(diffs, index1 + diff.text.length) - index2); 330 } 331 } 332 if( diff.operation != Operation.DELETE ){ 333 index1 += diff.text.length; 334 } 335 } 336 } 337 } 338 } 339 x++; 340 } 341 // Strip the padding off. 342 result.text = text.substr(nullPadding.length, text.length - 2 * nullPadding.length); 343 return result; 344 } 345 346 /** 347 * Add some padding on text start and end so that edges can match something. 348 * Intended to be called only from within patch_apply. 349 * @param patches Array of Patch objects. 350 * @return The padding string added to each side. 351 */ 352 string addPadding(Patch[] patches) 353 { 354 auto paddingLength = PATCH_MARGIN; 355 string nullPadding; 356 for(sizediff_t x = 1; x <= paddingLength; x++){ 357 nullPadding ~= cast(char)x; 358 } 359 360 // Bump all the patches forward. 361 foreach( patch; patches ){ 362 patch.start1 += paddingLength; 363 patch.start2 += paddingLength; 364 } 365 366 // Add some padding on start of first diff. 367 Patch patch = patches[0]; 368 auto diffs = patch.diffs; 369 if( diffs.length == 0 || diffs[0].operation != Operation.EQUAL ){ 370 // Add nullPadding equality. 371 diffs.insert(0, [Diff(Operation.EQUAL, nullPadding)]); 372 patch.start1 -= paddingLength; // Should be 0. 373 patch.start2 -= paddingLength; // Should be 0. 374 patch.length1 += paddingLength; 375 patch.length2 += paddingLength; 376 } else if (paddingLength > diffs[0].text.length) { 377 // Grow first equality. 378 Diff firstDiff = diffs[0]; 379 auto extraLength = paddingLength - firstDiff.text.length; 380 firstDiff.text = nullPadding.substr(firstDiff.text.length) ~ firstDiff.text; 381 patch.start1 -= extraLength; 382 patch.start2 -= extraLength; 383 patch.length1 += extraLength; 384 patch.length2 += extraLength; 385 } 386 387 // Add some padding on end of last diff. 388 patch = patches[$-1]; 389 diffs = patch.diffs; 390 if( diffs.length == 0 || diffs[$-1].operation != Operation.EQUAL) { 391 // Add nullPadding equality. 392 diffs ~= Diff(Operation.EQUAL, nullPadding); 393 patch.length1 += paddingLength; 394 patch.length2 += paddingLength; 395 } else if (paddingLength > diffs[$-1].text.length) { 396 // Grow last equality. 397 Diff lastDiff = diffs[$-1]; 398 auto extraLength = paddingLength - lastDiff.text.length; 399 lastDiff.text ~= nullPadding.substr(0, extraLength); 400 patch.length1 += extraLength; 401 patch.length2 += extraLength; 402 } 403 return nullPadding; 404 } 405 406 /** 407 * Look through the patches and break up any which are longer than the 408 * maximum limit of the match algorithm. 409 * Intended to be called only from within patch_apply. 410 * @param patches List of Patch objects. 411 */ 412 void splitMax(Patch[] patches) 413 { 414 auto patch_size = MATCH_MAXBITS; 415 for( auto x = 0; x < patches.length; x++ ){ 416 if( patches[x].length1 <= patch_size ) continue; 417 Patch bigpatch = patches[x]; 418 patches.splice(x--, 1); 419 auto start1 = bigpatch.start1; 420 auto start2 = bigpatch.start2; 421 string precontext; 422 while( bigpatch.diffs.length != 0){ 423 Patch patch; 424 bool empty = true; 425 patch.start1 = start1 - precontext.length; 426 patch.start2 = start2 - precontext.length; 427 if( precontext.length != 0 ){ 428 patch.length1 = patch.length2 = precontext.length; 429 patch.diffs ~= Diff(Operation.EQUAL, precontext); 430 } 431 while( bigpatch.diffs.length != 0 && patch.length1 < patch_size - PATCH_MARGIN ){ 432 Operation diff_type = bigpatch.diffs[0].operation; 433 auto diff_text = bigpatch.diffs[0].text; 434 if( diff_type == Operation.INSERT ){ 435 // Insertions are harmless. 436 patch.length2 += diff_text.length; 437 start2 += diff_text.length; 438 patch.diffs ~= bigpatch.diffs[0]; 439 bigpatch.diffs.remove(0); 440 empty = false; 441 } else if( diff_type == Operation.DELETE && patch.diffs.length == 1 442 && patch.diffs[0].operation == Operation.EQUAL 443 && diff_text.length > 2 * patch_size) { 444 // This is a large deletion. Let it pass in one chunk. 445 patch.length1 += diff_text.length; 446 start1 += diff_text.length; 447 empty = false; 448 patch.diffs ~= Diff(diff_type, diff_text); 449 bigpatch.diffs.remove(0); 450 } else { 451 // Deletion or equality. Only takes as much as we can stomach. 452 diff_text = diff_text.substr(0, min(diff_text.length, patch_size - patch.length1 - PATCH_MARGIN)); 453 patch.length1 += diff_text.length; 454 start1 += diff_text.length; 455 if( diff_type == Operation.EQUAL ){ 456 patch.length2 += diff_text.length; 457 start2 += diff_text.length; 458 } else { 459 empty = false; 460 } 461 patch.diffs ~= Diff(diff_type, diff_text); 462 if( diff_text == bigpatch.diffs[0].text ){ 463 bigpatch.diffs.remove(0); 464 } else { 465 bigpatch.diffs[0].text = bigpatch.diffs[0].text.substr(diff_text.length); 466 } 467 } 468 } 469 // Compute the head context for the next patch. 470 precontext = diff_text2(patch.diffs); 471 precontext = precontext.substr(max(0, precontext.length - PATCH_MARGIN)); 472 473 auto postcontext = diff_text1(bigpatch.diffs); 474 if( postcontext.length > PATCH_MARGIN ){ 475 postcontext = postcontext.substr(0, PATCH_MARGIN); 476 } 477 478 if( postcontext.length != 0 ){ 479 patch.length1 += postcontext.length; 480 patch.length2 += postcontext.length; 481 if( patch.diffs.length != 0 482 && patch.diffs[patch.diffs.length - 1].operation 483 == Operation.EQUAL) { 484 patch.diffs[$].text ~= postcontext; 485 } else { 486 patch.diffs ~= Diff(Operation.EQUAL, postcontext); 487 } 488 } 489 if( !empty ){ 490 patches.splice(++x, 0, [patch]); 491 } 492 } 493 } 494 } 495 496 /** 497 * Take a list of patches and return a textual representation. 498 * @param patches List of Patch objects. 499 * @return Text representation of patches. 500 */ 501 public string patch_toText(in Patch[] patches) 502 { 503 auto text = appender!string(); 504 foreach (aPatch; patches) 505 text ~= aPatch.toString(); 506 return text.data; 507 } 508 509 /** 510 * Parse a textual representation of patches and return a List of Patch 511 * objects. 512 * @param textline Text representation of patches. 513 * @return List of Patch objects. 514 * @throws ArgumentException If invalid input. 515 */ 516 public Patch[] patch_fromText(string textline) 517 { 518 import std.regex : regex, matchFirst; 519 import std..string : format, split; 520 521 auto patches = appender!(Patch[])(); 522 if (textline.length == 0) return null; 523 524 auto text = textline.split("\n"); 525 sizediff_t textPointer = 0; 526 auto patchHeader = regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); 527 char sign; 528 string line; 529 while (textPointer < text.length) { 530 auto m = matchFirst(text[textPointer], patchHeader); 531 enforce (m, "Invalid patch string: " ~ text[textPointer]); 532 Patch patch; 533 patch.start1 = m[1].to!sizediff_t; 534 if (m[2].length == 0) { 535 patch.start1--; 536 patch.length1 = 1; 537 } else if (m[2] == "0") { 538 patch.length1 = 0; 539 } else { 540 patch.start1--; 541 patch.length1 = m[2].to!sizediff_t; 542 } 543 544 patch.start2 = m[3].to!sizediff_t; 545 if (m[4].length == 0) { 546 patch.start2--; 547 patch.length2 = 1; 548 } else if (m[4] == "0") { 549 patch.length2 = 0; 550 } else { 551 patch.start2--; 552 patch.length2 = m[4].to!sizediff_t; 553 } 554 textPointer++; 555 556 while (textPointer < text.length) { 557 import std.uri : decodeComponent; 558 if (textPointer >= text.length || !text[textPointer].length) { 559 // Blank line? Whatever. 560 textPointer++; 561 continue; 562 } 563 sign = text[textPointer][0]; 564 line = text[textPointer][1 .. $]; 565 line = line.replace("+", "%2b"); 566 line = decodeComponent(line); 567 if (sign == '-') { 568 // Deletion. 569 patch.diffs ~= Diff(Operation.DELETE, line); 570 } else if (sign == '+') { 571 // Insertion. 572 patch.diffs ~= Diff(Operation.INSERT, line); 573 } else if (sign == ' ') { 574 // Minor equality. 575 patch.diffs ~= Diff(Operation.EQUAL, line); 576 } else if (sign == '@') { 577 // Start of next patch. 578 break; 579 } else { 580 // WTF? 581 throw new Exception(format("Invalid patch mode '%s' in: %s", sign, line)); 582 } 583 textPointer++; 584 } 585 586 patches ~= patch; 587 } 588 return patches.data; 589 }