1 /* 2 * Copyright 2008 Google Inc. All Rights Reserved. 3 * Copyright 2013-2014 Jan Krüger. All Rights Reserved. 4 * Author: fraser@google.com (Neil Fraser) 5 * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas) 6 * Author: jan@jandoe.de (Jan Krüger) 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 * Diff Match and Patch 21 * http://code.google.com/p/google-diff-match-patch/ 22 */ 23 module ddmp.patch; 24 import std.array; 25 import std.conv; 26 import std..string:lastIndexOf; 27 28 import ddmp.diff; 29 import ddmp.match; 30 import ddmp.util; 31 32 int MATCH_MAXBITS = 32; 33 int PATCH_MARGIN = 4; 34 float PATCH_DELETE_THRESHOLD = 0.5f; 35 36 struct Patch { 37 Diff[] diffs; 38 int start1; 39 int start2; 40 int length1; 41 int length2; 42 43 string toString() 44 const { 45 auto app = appender!string(); 46 app.put("@@ -"); 47 if( length1 == 0 ){ 48 app.put(to!string(start1)); 49 app.put(",0"); 50 } else if( length1 == 1 ){ 51 app.put(to!string(start1 + 1)); 52 } else { 53 app.put(to!string(start1 + 1)); 54 app.put(","); 55 app.put(to!string(length1)); 56 } 57 app.put(" +"); 58 if( length2 == 0 ){ 59 app.put(to!string(start2)); 60 app.put(",0"); 61 } else if( length2 == 1 ){ 62 app.put(to!string(start2 + 1)); 63 } else { 64 app.put(to!string(start2 + 1)); 65 app.put(","); 66 app.put(to!string(length2)); 67 } 68 app.put(" @@\n"); 69 foreach( d ; diffs){ 70 final switch( d.operation ){ 71 case Operation.INSERT: 72 app.put("+"); 73 break; 74 case Operation.DELETE: 75 app.put("-"); 76 break; 77 case Operation.EQUAL: 78 app.put(" "); 79 break; 80 } 81 //TODO: urlEncode 82 app.put(d.text); 83 app.put("\n"); 84 } 85 86 //TODO: unescapeForEncodeUriCompatability 87 return app.data(); 88 } 89 } 90 91 /** 92 * Increase the context until it is unique, 93 * but don't let the pattern expand beyond Match_MaxBits. 94 * @param patch The patch to grow. 95 * @param text Source text. 96 */ 97 void addContext(Patch patch, string text) 98 { 99 if( text.length == 0 ) return; 100 101 auto pattern = text.substr(patch.start2, patch.length1); 102 int padding = 0; 103 104 // Look for the first and last matches of pattern in text. If two 105 // different matches are found, increase the pattern length. 106 while( text.indexOfAlt(pattern) != text.lastIndexOf(pattern) 107 && pattern.length < MATCH_MAXBITS - PATCH_MARGIN - PATCH_MARGIN ){ 108 padding += PATCH_MARGIN; 109 pattern = text[max(0, patch.start2 - padding)..min(text.length, patch .start2 + patch.length1 + padding)]; 110 } 111 // Add one chunk for good luck. 112 padding += PATCH_MARGIN; 113 114 // Add the prefix. 115 auto prefix = text[max(0, patch.start2 - padding)..patch.start2]; 116 if( prefix.length != 0 ){ 117 patch.diffs.insert(0, [Diff(Operation.EQUAL, prefix)]); 118 } 119 120 // Add the suffix. 121 auto suffix = text[patch.start2 + patch.length1..min(text.length, patch.start2 + patch.length1 + padding)]; 122 if( suffix.length != 0 ){ 123 patch.diffs ~= Diff(Operation.EQUAL, suffix); 124 } 125 126 // Roll back the start points. 127 patch.start1 -= prefix.length; 128 patch.start2 -= prefix.length; 129 // Extend the lengths. 130 patch.length1 += prefix.length + suffix.length; 131 patch.length2 += prefix.length + suffix.length; 132 } 133 134 /** 135 * Compute a list of patches to turn text1 into text2. 136 * A set of diffs will be computed. 137 * @param text1 Old text. 138 * @param text2 New text. 139 * @return List of Patch objects. 140 */ 141 Patch[] patch_make(string text1, string text2) { 142 // No diffs provided, comAdde our own. 143 auto diffs = diff_main(text1, text2, true); 144 if (diffs.length > 2) { 145 cleanupSemantic(diffs); 146 cleanupEfficiency(diffs); 147 } 148 return patch_make(text1, diffs); 149 } 150 151 152 /** 153 * Compute a list of patches to turn text1 into text2. 154 * text1 will be derived from the provided diffs. 155 * @param diffs Array of Diff objects for text1 to text2. 156 * @return List of Patch objects. 157 */ 158 Patch[] patch_make(Diff[] diffs) { 159 // Check for null inputs not needed since null can't be passed in C#. 160 // No origin string provided, comAdde our own. 161 auto text1 = diff_text1(diffs); 162 return patch_make(text1, diffs); 163 } 164 165 166 /** 167 * Compute a list of patches to turn text1 into text2. 168 * text2 is not provided, diffs are the delta between text1 and text2. 169 * @param text1 Old text. 170 * @param diffs Array of Diff objects for text1 to text2. 171 * @return List of Patch objects. 172 */ 173 Patch[] patch_make(string text1, Diff[] diffs) 174 { 175 Patch[] patches; 176 if( diffs.length == 0 ) return patches; 177 178 Patch patch; 179 auto char_count1 = 0; // Number of characters into the text1 string. 180 auto char_count2 = 0; // Number of characters into the text2 string. 181 // Start with text1 (prepatch_text) and apply the diffs until we arrive at 182 // text2 (postpatch_text). We recreate the patches one by one to determine 183 // context info. 184 auto prepatch_text = text1; 185 auto postpatch_text = text1; 186 187 foreach( diff ; diffs ){ 188 if( patch.diffs.length == 0 && diff.operation != Operation.EQUAL ){ 189 // A new patch starts here. 190 patch.start1 = char_count1; 191 patch.start2 = char_count2; 192 } 193 194 final switch(diff.operation){ 195 case Operation.INSERT: 196 patch.diffs ~= diff; 197 patch.length2 += diff.text.length; 198 postpatch_text.insert(char_count2, diff.text); 199 break; 200 case Operation.DELETE: 201 patch.length2 += diff.text.length; 202 patch.diffs ~= diff; 203 postpatch_text.remove(char_count2, diff.text.length); 204 break; 205 case Operation.EQUAL: 206 if( diff.text.length <= 2 * PATCH_MARGIN && patch.diffs.length != 0 && diff != diffs[$-1] ){ 207 patch.diffs ~= diff; 208 patch.length1 += diff.text.length; 209 patch.length2 += diff.text.length; 210 } 211 212 if( diff.text.length >= 2 * PATCH_MARGIN ){ 213 if( patch.diffs.length != 0 ){ 214 addContext(patch, prepatch_text); 215 patches ~= patch; 216 patch = Patch(); 217 prepatch_text = postpatch_text; 218 char_count1 = char_count2; 219 } 220 } 221 break; 222 } 223 // Update the current character count. 224 if (diff.operation != Operation.INSERT) { 225 char_count1 += diff.text.length; 226 } 227 if (diff.operation != Operation.DELETE) { 228 char_count2 += diff.text.length; 229 } 230 } 231 // Pick up the leftover patch if not empty. 232 if( !patch.diffs.empty ){ 233 addContext(patch, prepatch_text); 234 patches ~= patch; 235 } 236 237 return patches; 238 } 239 240 241 /** 242 * Merge a set of patches onto the text. Return a patched text, as well 243 * as an array of true/false values indicating which patches were applied. 244 * @param patches Array of Patch objects 245 * @param text Old text. 246 * @return Two element Object array, containing the new text and an array of 247 * bool values. 248 */ 249 250 struct PatchApplyResult { 251 string text; 252 bool[] patchesApplied; 253 } 254 255 PatchApplyResult apply(Patch[] patches, string text) 256 { 257 PatchApplyResult result; 258 if( patches.length == 0 ) return result; 259 260 auto nullPadding = addPadding(patches); 261 text = nullPadding ~ text ~ nullPadding; 262 splitMax(patches); 263 264 result.patchesApplied.length = patches.length; // init patchesApplied array 265 int x = 0; 266 // delta keeps track of the offset between the expected and actual 267 // location of the previous patch. If there are patches expected at 268 // positions 10 and 20, but the first patch was found at 12, delta is 2 269 // and the second patch has an effective expected position of 22. 270 int delta = 0; 271 foreach( patch ; patches ){ 272 auto expected_loc = patch.start2 + delta; 273 auto text1 = diff_text1(patch.diffs); 274 int start_loc; 275 int end_loc = -1; 276 if( text1.length > MATCH_MAXBITS ){ 277 // patch_splitMax will only provide an oversized pattern 278 // in the case of a monster delete 279 start_loc = match_main(text, text1.substr(0, MATCH_MAXBITS), expected_loc); 280 if( start_loc != -1 ){ 281 end_loc = match_main(text, 282 text1.substr(text1.length - MATCH_MAXBITS), 283 expected_loc + text1.length - MATCH_MAXBITS); 284 if( end_loc == -1 || start_loc >= end_loc ){ 285 // Can't find valid trailing context. Drop this patch. 286 start_loc = -1; 287 } 288 } 289 } else { 290 start_loc = match_main(text, text1, expected_loc); 291 } 292 if( start_loc == -1 ){ 293 // No match found. :( 294 result.patchesApplied[x] = false; 295 // Subtract the delta for this failed patch from subsequent patches. 296 delta -= patch.length2 - patch.length1; 297 } else { 298 // Found a match. :) 299 result.patchesApplied[x] = true; 300 delta = start_loc - expected_loc; 301 string text2; 302 if( end_loc == -1 ){ 303 text2 = text[ start_loc .. min(start_loc + text1.length, text.length) ]; 304 } else { 305 text2 = text[ start_loc .. min(end_loc + MATCH_MAXBITS, text.length) ]; 306 } 307 if( text1 == text2 ) { 308 // Perfect match, just shove the replacement text in. 309 text = text.substr(0, start_loc) ~ diff_text2(patch.diffs) ~ text.substr(start_loc + text1.length); 310 } else { 311 // Imperfect match. Run a diff to get a framework of equivalent indices. 312 auto diffs = diff_main(text1, text2, false); 313 if( text1.length > MATCH_MAXBITS && levenshtein(diffs) / cast(float)text1.length > PATCH_DELETE_THRESHOLD){ 314 // The end points match, but the content is unacceptably bad. 315 result.patchesApplied[x] = false; 316 } else { 317 cleanupSemanticLossless(diffs); 318 auto index1 = 0; 319 foreach( diff; patch.diffs ){ 320 if( diff.operation != Operation.EQUAL ){ 321 auto index2 = xIndex(diffs, index1); 322 if( diff.operation == Operation.INSERT ){ 323 // Insertion 324 text.insert(start_loc + index2, diff.text); 325 } else if( diff.operation == Operation.DELETE ){ 326 // Deletion 327 text.remove(start_loc + index2, xIndex(diffs, index1 + diff.text.length) - index2); 328 } 329 } 330 if( diff.operation != Operation.DELETE ){ 331 index1 += diff.text.length; 332 } 333 } 334 } 335 } 336 } 337 x++; 338 } 339 // Strip the padding off. 340 result.text = text.substr(nullPadding.length, text.length - 2 * nullPadding.length); 341 return result; 342 } 343 344 /** 345 * Add some padding on text start and end so that edges can match something. 346 * Intended to be called only from within patch_apply. 347 * @param patches Array of Patch objects. 348 * @return The padding string added to each side. 349 */ 350 string addPadding(Patch[] patches) 351 { 352 auto paddingLength = PATCH_MARGIN; 353 string nullPadding; 354 for(int x = 1; x <= paddingLength; x++){ 355 nullPadding ~= cast(char)x; 356 } 357 358 // Bump all the patches forward. 359 foreach( patch; patches ){ 360 patch.start1 += paddingLength; 361 patch.start2 += paddingLength; 362 } 363 364 // Add some padding on start of first diff. 365 Patch patch = patches[0]; 366 auto diffs = patch.diffs; 367 if( diffs.length == 0 || diffs[0].operation != Operation.EQUAL ){ 368 // Add nullPadding equality. 369 diffs.insert(0, [Diff(Operation.EQUAL, nullPadding)]); 370 patch.start1 -= paddingLength; // Should be 0. 371 patch.start2 -= paddingLength; // Should be 0. 372 patch.length1 += paddingLength; 373 patch.length2 += paddingLength; 374 } else if (paddingLength > diffs[0].text.length) { 375 // Grow first equality. 376 Diff firstDiff = diffs[0]; 377 auto extraLength = paddingLength - firstDiff.text.length; 378 firstDiff.text = nullPadding.substr(firstDiff.text.length) ~ firstDiff.text; 379 patch.start1 -= extraLength; 380 patch.start2 -= extraLength; 381 patch.length1 += extraLength; 382 patch.length2 += extraLength; 383 } 384 385 // Add some padding on end of last diff. 386 patch = patches[$-1]; 387 diffs = patch.diffs; 388 if( diffs.length == 0 || diffs[$-1].operation != Operation.EQUAL) { 389 // Add nullPadding equality. 390 diffs ~= Diff(Operation.EQUAL, nullPadding); 391 patch.length1 += paddingLength; 392 patch.length2 += paddingLength; 393 } else if (paddingLength > diffs[$-1].text.length) { 394 // Grow last equality. 395 Diff lastDiff = diffs[$-1]; 396 auto extraLength = paddingLength - lastDiff.text.length; 397 lastDiff.text ~= nullPadding.substr(0, extraLength); 398 patch.length1 += extraLength; 399 patch.length2 += extraLength; 400 } 401 return nullPadding; 402 } 403 404 /** 405 * Look through the patches and break up any which are longer than the 406 * maximum limit of the match algorithm. 407 * Intended to be called only from within patch_apply. 408 * @param patches List of Patch objects. 409 */ 410 void splitMax(Patch[] patches) 411 { 412 auto patch_size = MATCH_MAXBITS; 413 for( auto x = 0; x < patches.length; x++ ){ 414 if( patches[x].length1 <= patch_size ) continue; 415 Patch bigpatch = patches[x]; 416 patches.splice(x--, 1); 417 auto start1 = bigpatch.start1; 418 auto start2 = bigpatch.start2; 419 string precontext; 420 while( bigpatch.diffs.length != 0){ 421 Patch patch; 422 bool empty = true; 423 patch.start1 = start1 - precontext.length; 424 patch.start2 = start2 - precontext.length; 425 if( precontext.length != 0 ){ 426 patch.length1 = patch.length2 = precontext.length; 427 patch.diffs ~= Diff(Operation.EQUAL, precontext); 428 } 429 while( bigpatch.diffs.length != 0 && patch.length1 < patch_size - PATCH_MARGIN ){ 430 Operation diff_type = bigpatch.diffs[0].operation; 431 auto diff_text = bigpatch.diffs[0].text; 432 if( diff_type == Operation.INSERT ){ 433 // Insertions are harmless. 434 patch.length2 += diff_text.length; 435 start2 += diff_text.length; 436 patch.diffs ~= bigpatch.diffs[0]; 437 bigpatch.diffs.remove(0); 438 empty = false; 439 } else if( diff_type == Operation.DELETE && patch.diffs.length == 1 440 && patch.diffs[0].operation == Operation.EQUAL 441 && diff_text.length > 2 * patch_size) { 442 // This is a large deletion. Let it pass in one chunk. 443 patch.length1 += diff_text.length; 444 start1 += diff_text.length; 445 empty = false; 446 patch.diffs ~= Diff(diff_type, diff_text); 447 bigpatch.diffs.remove(0); 448 } else { 449 // Deletion or equality. Only takes as much as we can stomach. 450 diff_text = diff_text.substr(0, min(diff_text.length, patch_size - patch.length1 - PATCH_MARGIN)); 451 patch.length1 += diff_text.length; 452 start1 += diff_text.length; 453 if( diff_type == Operation.EQUAL ){ 454 patch.length2 += diff_text.length; 455 start2 += diff_text.length; 456 } else { 457 empty = false; 458 } 459 patch.diffs ~= Diff(diff_type, diff_text); 460 if( diff_text == bigpatch.diffs[0].text ){ 461 bigpatch.diffs.remove(0); 462 } else { 463 bigpatch.diffs[0].text = bigpatch.diffs[0].text.substr(diff_text.length); 464 } 465 } 466 } 467 // Compute the head context for the next patch. 468 precontext = diff_text2(patch.diffs); 469 precontext = precontext.substr(max(0, precontext.length - PATCH_MARGIN)); 470 471 auto postcontext = diff_text1(bigpatch.diffs); 472 if( postcontext.length > PATCH_MARGIN ){ 473 postcontext = postcontext.substr(0, PATCH_MARGIN); 474 } 475 476 if( postcontext.length != 0 ){ 477 patch.length1 += postcontext.length; 478 patch.length2 += postcontext.length; 479 if( patch.diffs.length != 0 480 && patch.diffs[patch.diffs.length - 1].operation 481 == Operation.EQUAL) { 482 patch.diffs[$].text ~= postcontext; 483 } else { 484 patch.diffs ~= Diff(Operation.EQUAL, postcontext); 485 } 486 } 487 if( !empty ){ 488 patches.splice(++x, 0, [patch]); 489 } 490 } 491 } 492 } 493 494 /** 495 * Take a list of patches and return a textual representation. 496 * @param patches List of Patch objects. 497 * @return Text representation of patches. 498 */ 499 public string patch_toText(in Patch[] patches) 500 { 501 auto text = appender!string(); 502 foreach (aPatch; patches) 503 text ~= aPatch.toString(); 504 return text.data; 505 } 506 507 /** 508 * Parse a textual representation of patches and return a List of Patch 509 * objects. 510 * @param textline Text representation of patches. 511 * @return List of Patch objects. 512 * @throws ArgumentException If invalid input. 513 */ 514 public Patch[] patch_fromText(string textline) 515 { 516 import std.regex : regex, matchFirst; 517 import std..string : format, split; 518 519 auto patches = appender!(Patch[])(); 520 if (textline.length == 0) return null; 521 522 auto text = textline.split("\n"); 523 int textPointer = 0; 524 auto patchHeader = regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); 525 char sign; 526 string line; 527 while (textPointer < text.length) { 528 auto m = matchFirst(text[textPointer], patchHeader); 529 enforce (m, "Invalid patch string: " ~ text[textPointer]); 530 Patch patch; 531 patch.start1 = m[0].to!int; 532 if (m[1].length == 0) { 533 patch.start1--; 534 patch.length1 = 1; 535 } else if (m[1] == "0") { 536 patch.length1 = 0; 537 } else { 538 patch.start1--; 539 patch.length1 = m[1].to!int; 540 } 541 542 patch.start2 = m[2].to!int; 543 if (m[3].length == 0) { 544 patch.start2--; 545 patch.length2 = 1; 546 } else if (m[3] == "0") { 547 patch.length2 = 0; 548 } else { 549 patch.start2--; 550 patch.length2 = m[3].to!int; 551 } 552 textPointer++; 553 554 while (textPointer < text.length) { 555 import std.uri : decode; 556 if (textPointer >= text.length || !text[textPointer].length) { 557 // Blank line? Whatever. 558 textPointer++; 559 continue; 560 } 561 sign = text[textPointer][0]; 562 line = text[textPointer][1 .. $]; 563 line = line.replace("+", "%2b"); 564 line = decode(line); 565 if (sign == '-') { 566 // Deletion. 567 patch.diffs ~= Diff(Operation.DELETE, line); 568 } else if (sign == '+') { 569 // Insertion. 570 patch.diffs ~= Diff(Operation.INSERT, line); 571 } else if (sign == ' ') { 572 // Minor equality. 573 patch.diffs ~= Diff(Operation.EQUAL, line); 574 } else if (sign == '@') { 575 // Start of next patch. 576 break; 577 } else { 578 // WTF? 579 throw new Exception(format("Invalid patch mode '%s' in: %s", sign, line)); 580 } 581 textPointer++; 582 } 583 584 patches ~= patch; 585 } 586 return patches.data; 587 }