Created
October 21, 2013 05:59
-
-
Save Varriount/7079279 to your computer and use it in GitHub Desktop.
I present to you... Nimrod-Sharp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright 2008 Google Inc. All Rights Reserved. | |
* Author: [email protected] (Neil Fraser) | |
* Author: [email protected] (Matthaeus G. Chajdas) | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License") | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
* Diff Match and Patch | |
* http://code.google.com/p/google-diff-match-patch/ | |
*/ | |
using System | |
using System.Collections.Generic | |
using System.Linq | |
using System.Text | |
using System.Text.RegularExpressions | |
using System.Web | |
namespace DiffMatchPatch | |
internal static class CompatibilityExtensions | |
# JScript splice function | |
public static seq[T] Splice<T>(this seq[T] input, int start, int count, | |
params T[] objects) | |
seq[T] deletedRange = input.GetRange(start, count) | |
input.RemoveRange(start, count) | |
input.InsertRange(start, objects) | |
return deletedRange | |
# Java substring function | |
public static string JavaSubstring(this string s, int begin, int end) | |
return s[begin .. end - begin] | |
/** | |
* The data structure representing a diff is a List of Diff objects | |
* Diff(deletionOp, "Hello"), Diff(insertionOp, "Goodbye"), | |
* Diff(equalityOp, " world.") | |
* which means delete "Hello", add "Goodbye" and keep " world." | |
*/ | |
public enum Operation | |
DELETE, INSERT, EQUAL | |
/** | |
* Class representing one diff operation. | |
*/ | |
public class Diff | |
public Operation operation | |
# One of: INSERT, DELETE or EQUAL. | |
public string text | |
# The text associated with this diff operation. | |
## | |
## Constructor. Initializes the diff with the provided values. | |
## @param operation One of INSERT, DELETE or EQUAL. | |
## @param text The text being applied. | |
## | |
public Diff(Operation operation, string text) | |
# Construct a diff with the specified operation and text. | |
this.operation = operation | |
this.text = text | |
## | |
## Display a human-readable version of this Diff. | |
## @return text version. | |
## | |
public override string ToString() | |
string prettyText = this.text.Replace('\n', '\u00b6') | |
return "Diff(" + this.operation + ",\"" + prettyText + "\")" | |
## | |
## Is this Diff equivalent to another Diff? | |
## @param d Another Diff to compare against. | |
## @return true or false. | |
## | |
public override bool Equals(Object obj) | |
# If parameter is null return false. | |
if (obj == null) | |
return false | |
# If parameter cannot be cast to Diff return false. | |
Diff p = obj as Diff | |
if ((System.Object)p == null) | |
return false | |
# Return true if the fields match. | |
return p.operation == this.operation and p.text == this.text | |
public bool Equals(Diff obj) | |
# If parameter is null return false. | |
if (obj == null) | |
return false | |
# Return true if the fields match. | |
return obj.operation == this.operation and obj.text == this.text | |
public override int GetHashCode() | |
return text.GetHashCode() ^ operation.GetHashCode() | |
/** | |
* Class representing one patch operation. | |
*/ | |
public class Patch | |
public seq[Diff] diffs = newSeq[Diff]() | |
public int startOne | |
public int startTwo | |
public int lengthOne | |
public int lengthTwo | |
## | |
## Emmulate GNU diff's format. | |
## Header: @@ -382,8 +481,9 @@ | |
## Indicies are printed as 1-based, not 0-based. | |
## @return The GNU diff string. | |
## | |
public override string ToString() | |
string coordsOne, coordsTwo | |
if (this.lenOne == 0) | |
coordsOne = this.startOne + ",0" | |
elif (this.lenOne == 1) | |
coordsOne = Convert.ToString(this.startOne + 1) | |
else | |
coordsOne = (this.startOne + 1) + "," + this.lenOne | |
if (this.lenTwo == 0) | |
coordsTwo = this.startTwo + ",0" | |
elif (this.lenTwo == 1) | |
coordsTwo = Convert.ToString(this.startTwo + 1) | |
else | |
coordsTwo = (this.startTwo + 1) + "," + this.lenTwo | |
rope text = new rope() | |
text.add("@@ -").add(coordsOne).add(" +").add(coordsTwo) | |
.add(" @@\n") | |
# Escape the body of the patch with %xx notation. | |
for (Diff aDiff in this.diffs) | |
switch (aDiff.operation) | |
case insertionOp: | |
text.add('+') | |
break | |
case deletionOp: | |
text.add('-') | |
break | |
case equalityOp: | |
text.add(' ') | |
break | |
text.add(HttpUtility.UrlEncode(aDiff.text, | |
new UTF8Encoding()).Replace('+', ' ')).add("\n") | |
return diff_match_patch.unescapeForEncodeUriCompatability( | |
text.ToString()) | |
/** | |
* Class containing the diff, match and patch methods. | |
* Also Contains the behaviour settings. | |
*/ | |
public class diff_match_patch | |
# Defaults. | |
# Set these on your diff_match_patch instance to override the defaults. | |
# Number of seconds to map a diff before giving up (0 for infinity). | |
public float DiffTimeout = 1.0f | |
# Cost of an empty edit operation in terms of edit characters. | |
public short Diff_EditCost = 4 | |
# At what point is no match declared (0.0 = perfection, 1.0 = very loose). | |
public float MatchThreshold = 0.5f | |
# How far to search for a match (0 = exact location, 1000+ = broad match). | |
# A match this many characters away from the expected location will add | |
# 1.0 to the score (0.0 is a perfect match). | |
public int MatchDistance = 1000 | |
# When deleting a large block of text (over ~64 characters), how close | |
# do the contents have to be to match the expected contents. (0.0 = | |
# perfection, 1.0 = very loose). Note that MatchThreshold controls | |
# how closely the end points of a delete need to match. | |
public float PatchDeleteThreshold = 0.5f | |
# Chunk size for context length. | |
public short Patch_Margin = 4 | |
# The number of bits in an int. | |
private short Match_MaxBits = 32 | |
# DIFF FUNCTIONS | |
## | |
## Find the differences between two texts. | |
## Run a faster, slightly less optimal diff. | |
## This method allows the 'checkLines' of diff_main() to be optional. | |
## Most of the time checkLines is wanted, so default to true. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @return List of Diff objects. | |
## | |
public seq[Diff] diff_main(string textOne, string textTwo) | |
return diff_main(textOne, textTwo, true) | |
## | |
## Find the differences between two texts. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param checkLines Speedup flag. If false, then don't run a | |
## line-level diff first to identify the changed areas. | |
## If true, then run a faster slightly less optimal diff. | |
## @return List of Diff objects. | |
## | |
public seq[Diff] diff_main(string textOne, string textTwo, bool checkLines) | |
# Set a deadline by which time the diff must be complete. | |
DateTime deadline | |
if (this.DiffTimeout <= 0) | |
deadline = DateTime.MaxValue | |
else | |
deadline = epochTime + | |
new TimeSpan(((long)(DiffTimeout * 1000)) * 10000) | |
return diff_main(textOne, textTwo, checkLines, deadline) | |
## | |
## Find the differences between two texts. Simplifies the problem by | |
## stripping any common prefix or suffix off the texts before diffing. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param checkLines Speedup flag. If false, then don't run a | |
## line-level diff first to identify the changed areas. | |
## If true, then run a faster slightly less optimal diff. | |
## @param deadline Time when the diff should be complete by. Used | |
## internally for recursive calls. Users should set DiffTimeout | |
## instead. | |
## @return List of Diff objects. | |
## | |
private seq[Diff] diff_main(string textOne, string textTwo, bool checkLines, | |
DateTime deadline) | |
# Check for null inputs not needed since null can't be passed in C#. | |
# Check for equality (speedup). | |
seq[Diff] diffs | |
if (textOne == textTwo) | |
diffs = newSeq[Diff]() | |
if (textOne.len != 0) | |
diffs.add(newDiff(equalityOp, textOne)) | |
return diffs | |
# Trim off common prefix (speedup). | |
int commonLength = diff_commonPrefix(textOne, textTwo) | |
string commonprefix = textOne[0 .. commonLength] | |
textOne = textOne[commonLength .. len(textOne)] | |
textTwo = textTwo[commonLength .. len(textTwo)] | |
# Trim off common suffix (speedup). | |
commonLength = diff_commonSuffix(textOne, textTwo) | |
string commonsuffix = textOne.Substring(textOne.len - commonLength) | |
textOne = textOne.Substring(0, textOne.len - commonLength) | |
textTwo = textTwo.Substring(0, textTwo.len - commonLength) | |
# Compute the diff on the middle block. | |
diffs = diff_compute(textOne, textTwo, checkLines, deadline) | |
# Restore the prefix and suffix. | |
if (commonprefix.len != 0) | |
diffs.Insert(0, (newDiff(equalityOp, commonprefix))) | |
if (commonsuffix.len != 0) | |
diffs.add(newDiff(equalityOp, commonsuffix)) | |
diff_cleanupMerge(diffs) | |
return diffs | |
## | |
## Find the differences between two texts. Assumes that the texts do not | |
## have any common prefix or suffix. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param checkLines Speedup flag. If false, then don't run a | |
## line-level diff first to identify the changed areas. | |
## If true, then run a faster slightly less optimal diff. | |
## @param deadline Time when the diff should be complete by. | |
## @return List of Diff objects. | |
## | |
private seq[Diff] diff_compute(string textOne, string textTwo, | |
bool checkLines, DateTime deadline) | |
seq[Diff] diffs = newSeq[Diff]() | |
if (textOne.len == 0) | |
# Just add some text (speedup). | |
diffs.add(newDiff(insertionOp, textTwo)) | |
return diffs | |
if (textTwo.len == 0) | |
# Just delete some text (speedup). | |
diffs.add(newDiff(deletionOp, textOne)) | |
return diffs | |
string longText = textOne.len > textTwo.len ? textOne : textTwo | |
string shortText = textOne.len > textTwo.len ? textTwo : textOne | |
int i = longText.find(shortText, StringComparison.Ordinal) | |
if (i != -1) | |
# Shorter text is inside the longer text (speedup). | |
Operation op = (textOne.len > textTwo.len) ? | |
deletionOp : insertionOp | |
diffs.add(newDiff(op, longText[0 .. i])) | |
diffs.add(newDiff(equalityOp, shortText)) | |
diffs.add(newDiff(op, longText.Substring(i + shortText.len))) | |
return diffs | |
if (shortText.len == 1) | |
# Single character string. | |
# After the previous speedup, the character can't be an equality. | |
diffs.add(newDiff(deletionOp, textOne)) | |
diffs.add(newDiff(insertionOp, textTwo)) | |
return diffs | |
# Check to see if the problem can be split in two. | |
string[] hm = diff_halfMatch(textOne, textTwo) | |
if (hm != null) | |
# A half-match was found, sort out the return data. | |
string textOneA = hm[0] | |
string textOneB = hm[1] | |
string textTwoA = hm[2] | |
string textTwoB = hm[3] | |
string midCommon = hm[4] | |
# Send both pairs off for separate processing. | |
seq[Diff] diffsA = diff_main(textOneA, textTwoA, checkLines, deadline) | |
seq[Diff] diffsB = diff_main(textOneB, textTwoB, checkLines, deadline) | |
# Merge the results. | |
diffs = diffsA | |
diffs.add(newDiff(equalityOp, midCommon)) | |
diffs.AddRange(diffsB) | |
return diffs | |
if (checkLines and textOne.len > 100 and textTwo.len > 100) | |
return diff_lineMode(textOne, textTwo, deadline) | |
return diff_bisect(textOne, textTwo, deadline) | |
## | |
## Do a quick line-level diff on both strings, then rediff the parts for | |
## greater accuracy. | |
## This speedup can produce non-minimal diffs. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param deadline Time when the diff should be complete by. | |
## @return List of Diff objects. | |
## | |
private seq[Diff] diff_lineMode(string textOne, string textTwo, | |
DateTime deadline) | |
# Scan the text on a line-by-line basis first. | |
Object[] b = diff_linesToChars(textOne, textTwo) | |
textOne = (string)b[0] | |
textTwo = (string)b[1] | |
seq[string] lineArray = (seq[string])b[2] | |
seq[Diff] diffs = diff_main(textOne, textTwo, false, deadline) | |
# Convert the diff back to original text. | |
diff_charsToLines(diffs, lineArray) | |
# Eliminate freak matches (e.g. blank lines) | |
diff_cleanupSemantic(diffs) | |
# Rediff any replacement blocks, this time character-by-character. | |
# Add a dummy entry at the end. | |
diffs.add(newDiff(equalityOp, "")) | |
int pointer = 0 | |
int countDelete = 0 | |
int countInsert = 0 | |
string textDelete = "" | |
string textInsert = "" | |
while (pointer < diffs.len) | |
switch (diffs[pointer].operation) | |
case insertionOp: | |
countInsert += 1 | |
textInsert += diffs[pointer].text | |
break | |
case deletionOp: | |
countDelete += 1 | |
textDelete += diffs[pointer].text | |
break | |
case equalityOp: | |
# Upon reaching an equality, check for prior redundancies. | |
if (countDelete >= 1 and countInsert >= 1) | |
# Delete the offending records and add the merged ones. | |
diffs.RemoveRange(pointer - countDelete - countInsert, | |
countDelete + countInsert) | |
pointer = pointer - countDelete - countInsert | |
seq[Diff] a = | |
this.diff_main(textDelete, textInsert, false, deadline) | |
diffs.InsertRange(pointer, a) | |
pointer = pointer + a.len | |
countInsert = 0 | |
countDelete = 0 | |
textDelete = "" | |
textInsert = "" | |
break | |
pointer += 1 | |
diffs.delete(diffs.len - 1); // Remove the dummy entry at the end. | |
return diffs | |
## | |
## Find the 'middle snake' of a diff, split the problem in two | |
## and return the recursively constructed diff. | |
## See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param deadline Time at which to bail if not yet complete. | |
## @return List of Diff objects. | |
## | |
protected seq[Diff] diff_bisect(string textOne, string textTwo, | |
DateTime deadline) | |
# Cache the text lengths to prevent multiple calls. | |
int textOneLength = textOne.len | |
int textTwoLength = textTwo.len | |
int maxD = (textOneLength + textTwoLength + 1) / 2 | |
int vOffset = maxD | |
int vLength = 2 * maxD | |
int[] vOne = new int[vLength] | |
int[] vTwo = new int[vLength] | |
for (int x = 0; x < vLength; x += 1) | |
vOne[x] = -1 | |
vTwo[x] = -1 | |
vOne[vOffset + 1] = 0 | |
vTwo[vOffset + 1] = 0 | |
int delta = textOneLength - textTwoLength | |
# If the total number of characters is odd, then the front path will | |
# collide with the reverse path. | |
bool front = (delta % 2 != 0) | |
# Offsets for start and end of k loop. | |
# Prevents mapping of space beyond the grid. | |
int kOneStart = 0 | |
int kOneEnd = 0 | |
int kTwoStart = 0 | |
int kTwoEnd = 0 | |
for (int d = 0; d < maxD; d += 1) | |
# Bail out if deadline is reached. | |
if (epochTime > deadline) | |
break | |
# Walk the front path one step. | |
for (int kOne = -d + kOneStart; kOne <= d - kOneEnd; kOne += 2) | |
int kOneOffset = vOffset + kOne | |
int xOne | |
if (kOne == -d or kOne != d and vOne[kOneOffset - 1] < vOne[kOneOffset + 1]) | |
xOne = vOne[kOneOffset + 1] | |
else | |
xOne = vOne[kOneOffset - 1] + 1 | |
int yOne = xOne - kOne | |
while (xOne < textOneLength and yOne < textTwoLength | |
and textOne[xOne] == textTwo[yOne]) | |
xOne += 1 | |
yOne += 1 | |
vOne[kOneOffset] = xOne | |
if (xOne > textOneLength) | |
# Ran off the right of the graph. | |
kOneEnd += 2 | |
elif (yOne > textTwoLength) | |
# Ran off the bottom of the graph. | |
kOneStart += 2 | |
elif (front) | |
int kTwoOffset = vOffset + delta - kOne | |
if (kTwoOffset >= 0 and kTwoOffset < vLength and vTwo[kTwoOffset] != -1) | |
# Mirror xTwo onto top-left coordinate system. | |
int xTwo = textOneLength - vTwo[kTwoOffset] | |
if (xOne >= xTwo) | |
# Overlap detected. | |
return diff_bisectSplit(textOne, textTwo, xOne, yOne, deadline) | |
# Walk the reverse path one step. | |
for (int kTwo = -d + kTwoStart; kTwo <= d - kTwoEnd; kTwo += 2) | |
int kTwoOffset = vOffset + kTwo | |
int xTwo | |
if (kTwo == -d or kTwo != d and vTwo[kTwoOffset - 1] < vTwo[kTwoOffset + 1]) | |
xTwo = vTwo[kTwoOffset + 1] | |
else | |
xTwo = vTwo[kTwoOffset - 1] + 1 | |
int yTwo = xTwo - kTwo | |
while (xTwo < textOneLength and yTwo < textTwoLength | |
and textOne[textOneLength - xTwo - 1] | |
== textTwo[textTwoLength - yTwo - 1]) | |
xTwo += 1 | |
yTwo += 1 | |
vTwo[kTwoOffset] = xTwo | |
if (xTwo > textOneLength) | |
# Ran off the left of the graph. | |
kTwoEnd += 2 | |
elif (yTwo > textTwoLength) | |
# Ran off the top of the graph. | |
kTwoStart += 2 | |
elif (!front) | |
int kOneOffset = vOffset + delta - kTwo | |
if (kOneOffset >= 0 and kOneOffset < vLength and vOne[kOneOffset] != -1) | |
int xOne = vOne[kOneOffset] | |
int yOne = vOffset + xOne - kOneOffset | |
# Mirror xTwo onto top-left coordinate system. | |
xTwo = textOneLength - vTwo[kTwoOffset] | |
if (xOne >= xTwo) | |
# Overlap detected. | |
return diff_bisectSplit(textOne, textTwo, xOne, yOne, deadline) | |
# Diff took too long and hit the deadline or | |
# number of diffs equals number of characters, no commonality at all. | |
seq[Diff] diffs = newSeq[Diff]() | |
diffs.add(newDiff(deletionOp, textOne)) | |
diffs.add(newDiff(insertionOp, textTwo)) | |
return diffs | |
## | |
## Given the location of the 'middle snake', split the diff in two parts | |
## and recurse. | |
## @param textOne Old string to be diffed. | |
## @param textTwo New string to be diffed. | |
## @param x Index of split point in textOne. | |
## @param y Index of split point in textTwo. | |
## @param deadline Time at which to bail if not yet complete. | |
## @return LinkedList of Diff objects. | |
## | |
private seq[Diff] diff_bisectSplit(string textOne, string textTwo, | |
int x, int y, DateTime deadline) | |
string textOneA = textOne[0 .. x] | |
string textTwoA = textTwo[0 .. y] | |
string textOneB = textOne[x .. len(textOne)] | |
string textTwoB = textTwo[y .. len(textTwo)] | |
# Compute both diffs serially. | |
seq[Diff] diffsA = diff_main(textOneA, textTwoA, false, deadline) | |
seq[Diff] diffsB = diff_main(textOneB, textTwoB, false, deadline) | |
diffsA.AddRange(diffsB) | |
return diffsA | |
## | |
## Split two texts into a list of strings. Reduce the texts to a string of | |
## hashes where each Unicode character represents one line. | |
## @param textOne First string. | |
## @param textTwo Second string. | |
## @return Three element Object array, containing the encoded textOne, the | |
## encoded textTwo and the List of unique strings. The zeroth element | |
## of the List of unique strings is intentionally blank. | |
## | |
protected Object[] diff_linesToChars(string textOne, string textTwo) | |
seq[string] lineArray = newSeq[string]() | |
Dictionary<string, int> lineHash = new Dictionary<string, int>() | |
# e.g. lineArray[4] == "Hello\n" | |
# e.g. linehash.get("Hello\n") == 4 | |
# "\x00" is a valid character, but various debuggers don't like it. | |
# So we'll insert a junk entry to avoid generating a null character. | |
lineArray.add("") | |
string charsOne = diff_linesToCharsMunge(textOne, lineArray, lineHash) | |
string charsTwo = diff_linesToCharsMunge(textTwo, lineArray, lineHash) | |
return new Object[] charsOne, charsTwo, lineArray | |
## | |
## Split a text into a list of strings. Reduce the texts to a string of | |
## hashes where each Unicode character represents one line. | |
## @param text String to encode. | |
## @param lineArray List of unique strings. | |
## @param lineHash Map of strings to indices. | |
## @return Encoded string. | |
## | |
private string diff_linesToCharsMunge(string text, seq[string] lineArray, | |
Dictionary<string, int> lineHash) | |
int lineStart = 0 | |
int lineEnd = -1 | |
string line | |
rope chars = new rope() | |
# Walk the text, pulling out a Substring for each line. | |
# text.split('\n') would would temporarily double our memory footprint. | |
# Modifying text would create many large strings to garbage collect. | |
while (lineEnd < text.len - 1) | |
lineEnd = text.find('\n', lineStart) | |
if (lineEnd == -1) | |
lineEnd = text.len - 1 | |
line = text.JavaSubstring(lineStart, lineEnd + 1) | |
lineStart = lineEnd + 1 | |
if (lineHash.ContainsKey(line)) | |
chars.add(((char)(int)lineHash[line])) | |
else | |
lineArray.add(line) | |
lineHash.add(line, lineArray.len - 1) | |
chars.add(((char)(lineArray.len - 1))) | |
return chars.ToString() | |
## | |
## Rehydrate the text in a diff from a string of line hashes to real lines | |
## of text. | |
## @param diffs List of Diff objects. | |
## @param lineArray List of unique strings. | |
## | |
protected void diff_charsToLines(ICollection<Diff> diffs, | |
seq[string] lineArray) | |
rope text | |
for (Diff diff in diffs) | |
text = new rope() | |
for (int y = 0; y < diff.text.len; y += 1) | |
text.add(lineArray[diff.text[y]]) | |
diff.text = text.ToString() | |
## | |
## Determine the common prefix of two strings. | |
## @param textOne First string. | |
## @param textTwo Second string. | |
## @return The number of characters common to the start of each string. | |
## | |
public int diff_commonPrefix(string textOne, string textTwo) | |
# Performance analysis: http://neil.fraser.name/news/2007/10/09/ | |
int n = min(textOne.len, textTwo.len) | |
for (int i = 0; i < n; i += 1) | |
if (textOne[i] != textTwo[i]) | |
return i | |
return n | |
## | |
## Determine the common suffix of two strings. | |
## @param textOne First string. | |
## @param textTwo Second string. | |
## @return The number of characters common to the end of each string. | |
## | |
public int diff_commonSuffix(string textOne, string textTwo) | |
# Performance analysis: http://neil.fraser.name/news/2007/10/09/ | |
int textOneLength = textOne.len | |
int textTwoLength = textTwo.len | |
int n = min(textOne.len, textTwo.len) | |
for (int i = 1; i <= n; i += 1) | |
if (textOne[textOneLength - i] != textTwo[textTwoLength - i]) | |
return i - 1 | |
return n | |
## | |
## Determine if the suffix of one string is the prefix of another. | |
## @param textOne First string. | |
## @param textTwo Second string. | |
## @return The number of characters common to the end of the first | |
## string and the start of the second string. | |
## | |
protected int diff_commonOverlap(string textOne, string textTwo) | |
# Cache the text lengths to prevent multiple calls. | |
int textOneLength = textOne.len | |
int textTwoLength = textTwo.len | |
# Eliminate the null case. | |
if (textOneLength == 0 or textTwoLength == 0) | |
return 0 | |
# Truncate the longer string. | |
if (textOneLength > textTwoLength) | |
textOne = textOne[textOneLength - textTwoLength .. len(textOne)] | |
elif (textOneLength < textTwoLength) | |
textTwo = textTwo[0 .. textOneLength] | |
int textLength = min(textOneLength, textTwoLength) | |
# Quick check for the worst case. | |
if (textOne == textTwo) | |
return textLength | |
# Start by looking for a single character match | |
# and increase length until no match is found. | |
# Performance analysis: http://neil.fraser.name/news/2010/11/04/ | |
int best = 0 | |
int length = 1 | |
while (true) | |
string pattern = textOne[textLength - length .. len(textOne)] | |
int found = textTwo.find(pattern, StringComparison.Ordinal) | |
if (found == -1) | |
return best | |
length += found | |
if (found == 0 or textOne[textLength - length .. len(textOne)] == | |
textTwo[0 .. length]) | |
best = length | |
length += 1 | |
## | |
## Do the two texts share a Substring which is at least half the length of | |
## the longer text? | |
## This speedup can produce non-minimal diffs. | |
## @param textOne First string. | |
## @param textTwo Second string. | |
## @return Five element String array, containing the prefix of textOne, the | |
## suffix of textOne, the prefix of textTwo, the suffix of textTwo and the | |
## common middle. Or null if there was no match. | |
## | |
protected string[] diff_halfMatch(string textOne, string textTwo) | |
if (this.DiffTimeout <= 0) | |
# Don't risk returning a non-optimal diff if we have unlimited time. | |
return null | |
string longText = textOne.len > textTwo.len ? textOne : textTwo | |
string shortText = textOne.len > textTwo.len ? textTwo : textOne | |
if (longText.len < 4 or shortText.len * 2 < longText.len) | |
return null; // Pointless. | |
# First check if the second quarter is the seed for a half-match. | |
string[] hm1 = diff_halfMatchI(longText, shortText, | |
(longText.len + 3) / 4) | |
# Check again based on the third quarter. | |
string[] hm2 = diff_halfMatchI(longText, shortText, | |
(longText.len + 1) / 2) | |
string[] hm | |
if (hm1 == null and hm2 == null) | |
return null | |
elif (hm2 == null) | |
hm = hm1 | |
elif (hm1 == null) | |
hm = hm2 | |
else | |
# Both matched. Select the longest. | |
hm = hm1[4].len > hm2[4].len ? hm1 : hm2 | |
# A half-match was found, sort out the return data. | |
if (textOne.len > textTwo.len) | |
return hm | |
#return new string[]hm[0], hm[1], hm[2], hm[3], hm[4] | |
else | |
return new string[] hm[2], hm[3], hm[0], hm[1], hm[4] | |
## | |
## Does a Substring of shortText exist within longText such that the | |
## Substring is at least half the length of longText? | |
## @param longText Longer string. | |
## @param shortText Shorter string. | |
## @param i Start index of quarter length Substring within longText. | |
## @return Five element string array, containing the prefix of longText, the | |
## suffix of longText, the prefix of shortText, the suffix of shortText | |
## and the common middle. Or null if there was no match. | |
## | |
private string[] diff_halfMatchI(string longText, string shortText, int i) | |
# Start with a 1/4 length Substring at position i as a seed. | |
string seed = longText.Substring(i, longText.len / 4) | |
int j = -1 | |
string bestCommon = "" | |
string bestLongTextA = "", bestLongTextB = "" | |
string bestShortTextA = "", bestShortTextB = "" | |
while (j < shortText.len and (j = shortText.find(seed, j + 1, | |
StringComparison.Ordinal)) != -1) | |
int prefixLength = diff_commonPrefix(longText[i .. len(longText)], | |
shortText[j .. len(shortText)]) | |
int suffixLength = diff_commonSuffix(longText[0 .. i], | |
shortText[0 .. j]) | |
if (bestCommon.len < suffixLength + prefixLength) | |
bestCommon = shortText[j - suffixLength .. suffixLength] | |
+ shortText[j .. prefixLength] | |
bestLongTextA = longText[0 .. i - suffixLength] | |
bestLongTextB = longText.Substring(i + prefixLength) | |
bestShortTextA = shortText[0 .. j - suffixLength] | |
bestShortTextB = shortText.Substring(j + prefixLength) | |
if (bestCommon.len * 2 >= longText.len) | |
return new string[]bestLongTextA, bestLongTextB, | |
bestShortTextA, bestShortTextB, bestCommon | |
else | |
return null | |
## | |
## Reduce the number of edits by eliminating semantically trivial | |
## equalities. | |
## @param diffs List of Diff objects. | |
## | |
public void diff_cleanupSemantic(seq[Diff] diffs) | |
bool changes = false | |
# Stack of indices where equalities are found. | |
Stack<int> equalities = new Stack<int>() | |
# Always equal to equalities[equalitiesLength-1][1] | |
string lastEquality = null | |
int pointer = 0; // Index of current position. | |
# Number of characters that changed prior to the equality. | |
int lengthInsertionsOne = 0 | |
int lengthDeletionsOne = 0 | |
# Number of characters that changed after the equality. | |
int lengthInsertionsTwo = 0 | |
int lengthDeletionsTwo = 0 | |
while (pointer < diffs.len) | |
if (diffs[pointer].operation == equalityOp) // Equality found. | |
equalities.Push(pointer) | |
lengthInsertionsOne = lengthInsertionsTwo | |
lengthDeletionsOne = lengthDeletionsTwo | |
lengthInsertionsTwo = 0 | |
lengthDeletionsTwo = 0 | |
lastEquality = diffs[pointer].text | |
else // an insertion or deletion | |
if (diffs[pointer].operation == insertionOp) | |
lengthInsertionsTwo += diffs[pointer].text.len | |
else | |
lengthDeletionsTwo += diffs[pointer].text.len | |
# Eliminate an equality that is smaller or equal to the edits on both | |
# sides of it. | |
if (lastEquality != null and (lastEquality.len | |
<= max(lengthInsertionsOne, lengthDeletionsOne)) | |
and (lastEquality.len | |
<= max(lengthInsertionsTwo, lengthDeletionsTwo))) | |
# Duplicate record. | |
diffs.Insert(equalities.Peek(), | |
newDiff(deletionOp, lastEquality)) | |
# Change second copy to insert. | |
diffs[equalities.Peek() + 1].operation = insertionOp | |
# Throw away the equality we just deleted. | |
equalities.Pop() | |
if (equalities.len > 0) | |
equalities.Pop() | |
pointer = equalities.len > 0 ? equalities.Peek() : -1 | |
lengthInsertionsOne = 0; // Reset the counters. | |
lengthDeletionsOne = 0 | |
lengthInsertionsTwo = 0 | |
lengthDeletionsTwo = 0 | |
lastEquality = null | |
changes = true | |
pointer += 1 | |
# Normalize the diff. | |
if (changes) | |
diff_cleanupMerge(diffs) | |
diff_cleanupSemanticLossless(diffs) | |
# Find any overlaps between deletions and insertions. | |
# e.g: <del>abcxxx</del><ins>xxxdef</ins> | |
# -> <del>abc</del>xxx<ins>def</ins> | |
# e.g: <del>xxxabc</del><ins>defxxx</ins> | |
# -> <ins>def</ins>xxx<del>abc</del> | |
# Only extract an overlap if it is as big as the edit ahead or behind it. | |
pointer = 1 | |
while (pointer < diffs.len) | |
if (diffs[pointer - 1].operation == deletionOp and | |
diffs[pointer].operation == insertionOp) | |
string deletion = diffs[pointer - 1].text | |
string insertion = diffs[pointer].text | |
int overlapLengthOne = diff_commonOverlap(deletion, insertion) | |
int overlapLengthTwo = diff_commonOverlap(insertion, deletion) | |
if (overlapLengthOne >= overlapLengthTwo) | |
if (overlapLengthOne >= deletion.len / 2.0 or | |
overlapLengthOne >= insertion.len / 2.0) | |
# Overlap found. | |
# Insert an equality and trim the surrounding edits. | |
diffs.Insert(pointer, newDiff(equalityOp, | |
insertion[0 .. overlapLengthOne])) | |
diffs[pointer - 1].text = | |
deletion.Substring(0, deletion.len - overlapLengthOne) | |
diffs[pointer + 1].text = insertion[overlapLengthOne .. len(insertion)] | |
pointer += 1 | |
else | |
if (overlapLengthTwo >= deletion.len / 2.0 or | |
overlapLengthTwo >= insertion.len / 2.0) | |
# Reverse overlap found. | |
# Insert an equality and swap and trim the surrounding edits. | |
diffs.Insert(pointer, newDiff(equalityOp, | |
deletion[0 .. overlapLengthTwo])) | |
diffs[pointer - 1].operation = insertionOp | |
diffs[pointer - 1].text = | |
insertion.Substring(0, insertion.len - overlapLengthTwo) | |
diffs[pointer + 1].operation = deletionOp | |
diffs[pointer + 1].text = deletion[overlapLengthTwo .. len(deletion)] | |
pointer += 1 | |
pointer += 1 | |
pointer += 1 | |
## | |
## Look for single edits surrounded on both sides by equalities | |
## which can be shifted sideways to align the edit to a word boundary. | |
## e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. | |
## @param diffs List of Diff objects. | |
## | |
public void diff_cleanupSemanticLossless(seq[Diff] diffs) | |
int pointer = 1 | |
# Intentionally ignore the first and last element (don't need checking). | |
while (pointer < diffs.len - 1) | |
if (diffs[pointer - 1].operation == equalityOp and | |
diffs[pointer + 1].operation == equalityOp) | |
# This is a single edit surrounded by equalities. | |
string equalityOne = diffs[pointer - 1].text | |
string edit = diffs[pointer].text | |
string equalityTwo = diffs[pointer + 1].text | |
# First, shift the edit as far left as possible. | |
int commonOffset = this.diff_commonSuffix(equalityOne, edit) | |
if (commonOffset > 0) | |
string commonString = edit.Substring(edit.len - commonOffset) | |
equalityOne = equalityOne.Substring(0, equalityOne.len - commonOffset) | |
edit = commonString + edit.Substring(0, edit.len - commonOffset) | |
equalityTwo = commonString + equalityTwo | |
# Second, step character by character right, | |
# looking for the best fit. | |
string bestEqualityOne = equalityOne | |
string bestEdit = edit | |
string bestEqualityTwo = equalityTwo | |
int bestScore = diff_cleanupSemanticScore(equalityOne, edit) + | |
diff_cleanupSemanticScore(edit, equalityTwo) | |
while (edit.len != 0 and equalityTwo.len != 0 | |
and edit[0] == equalityTwo[0]) | |
equalityOne += edit[0] | |
edit = edit[1 .. len(edit)] + equalityTwo[0] | |
equalityTwo = equalityTwo[1 .. len(equalityTwo)] | |
int score = diff_cleanupSemanticScore(equalityOne, edit) + | |
diff_cleanupSemanticScore(edit, equalityTwo) | |
# The >= encourages trailing rather than leading whitespace on | |
# edits. | |
if (score >= bestScore) | |
bestScore = score | |
bestEqualityOne = equalityOne | |
bestEdit = edit | |
bestEqualityTwo = equalityTwo | |
if (diffs[pointer - 1].text != bestEqualityOne) | |
# We have an improvement, save it back to the diff. | |
if (bestEqualityOne.len != 0) | |
diffs[pointer - 1].text = bestEqualityOne | |
else | |
diffs.delete(pointer - 1) | |
pointer-- | |
diffs[pointer].text = bestEdit | |
if (bestEqualityTwo.len != 0) | |
diffs[pointer + 1].text = bestEqualityTwo | |
else | |
diffs.delete(pointer + 1) | |
pointer-- | |
pointer += 1 | |
## | |
## Given two strings, comAdde a score representing whether the internal | |
## boundary falls on logical boundaries. | |
## Scores range from 6 (best) to 0 (worst). | |
## @param one First string. | |
## @param two Second string. | |
## @return The score. | |
## | |
private int diff_cleanupSemanticScore(string one, string two) | |
if (one.len == 0 or two.len == 0) | |
# Edges are the best. | |
return 6 | |
# Each port of this function behaves slightly differently due to | |
# subtle differences in each language's definition of things like | |
# 'whitespace'. Since this function's purpose is largely cosmetic, | |
# the choice has been made to use each language's native features | |
# rather than force total conformity. | |
char charTwo = one[one.len - 1] | |
char charTwo = two[0] | |
bool nonAlphaNumericOne = !Char.IsLetterOrDigit(charTwo) | |
bool nonAlphaNumericTwo = !Char.IsLetterOrDigit(charTwo) | |
bool whitespaceOne = nonAlphaNumericOne and Char.IsWhiteSpace(charTwo) | |
bool whitespaceTwo = nonAlphaNumericTwo and Char.IsWhiteSpace(charTwo) | |
bool lineBreakOne = whitespaceOne and Char.IsControl(charTwo) | |
bool lineBreakTwo = whitespaceTwo and Char.IsControl(charTwo) | |
bool blankLineOne = lineBreakOne and BLANKLINEEND.IsMatch(one) | |
bool blankLineTwo = lineBreakTwo and BLANKLINESTART.IsMatch(two) | |
if (blankLineOne or blankLineTwo) | |
# Five points for blank lines. | |
return 5 | |
elif (lineBreakOne or lineBreakTwo) | |
# Four points for line breaks. | |
return 4 | |
elif (nonAlphaNumericOne and !whitespaceOne and whitespaceTwo) | |
# Three points for end of sentences. | |
return 3 | |
elif (whitespaceOne or whitespaceTwo) | |
# Two points for whitespace. | |
return 2 | |
elif (nonAlphaNumericOne or nonAlphaNumericTwo) | |
# One point for non-alphanumeric. | |
return 1 | |
return 0 | |
# Define some regex patterns for matching boundaries. | |
private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z") | |
private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n") | |
## | |
## Reduce the number of edits by eliminating operationally trivial | |
## equalities. | |
## @param diffs List of Diff objects. | |
## | |
public void diff_cleanupEfficiency(seq[Diff] diffs) | |
bool changes = false | |
# Stack of indices where equalities are found. | |
Stack<int> equalities = new Stack<int>() | |
# Always equal to equalities[equalitiesLength-1][1] | |
string lastEquality = "" | |
int pointer = 0; // Index of current position. | |
# Is there an insertion operation before the last equality. | |
bool preIns = false | |
# Is there a deletion operation before the last equality. | |
bool preDel = false | |
# Is there an insertion operation after the last equality. | |
bool postIns = false | |
# Is there a deletion operation after the last equality. | |
bool postDel = false | |
while (pointer < diffs.len) | |
if (diffs[pointer].operation == equalityOp) // Equality found. | |
if (diffs[pointer].text.len < this.Diff_EditCost | |
and (postIns or postDel)) | |
# Candidate found. | |
equalities.Push(pointer) | |
preIns = postIns | |
preDel = postDel | |
lastEquality = diffs[pointer].text | |
else | |
# Not a candidate, and can never become one. | |
equalities.Clear() | |
lastEquality = "" | |
postIns = postDel = false | |
else // An insertion or deletion. | |
if (diffs[pointer].operation == deletionOp) | |
postDel = true | |
else | |
postIns = true | |
/* | |
## Five types to be split: | |
## <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> | |
## <ins>A</ins>X<ins>C</ins><del>D</del> | |
## <ins>A</ins><del>B</del>X<ins>C</ins> | |
## <ins>A</del>X<ins>C</ins><del>D</del> | |
## <ins>A</ins><del>B</del>X<del>C</del> | |
## | |
if ((lastEquality.len != 0) | |
and ((preIns and preDel and postIns and postDel) | |
or ((lastEquality.len < this.Diff_EditCost / 2) | |
and ((preIns ? 1 : 0) + (preDel ? 1 : 0) + (postIns ? 1 : 0) | |
+ (postDel ? 1 : 0)) == 3))) | |
# Duplicate record. | |
diffs.Insert(equalities.Peek(), | |
newDiff(deletionOp, lastEquality)) | |
# Change second copy to insert. | |
diffs[equalities.Peek() + 1].operation = insertionOp | |
equalities.Pop(); // Throw away the equality we just deleted. | |
lastEquality = "" | |
if (preIns and preDel) | |
# No changes made which could affect previous entry, keep going. | |
postIns = postDel = true | |
equalities.Clear() | |
else | |
if (equalities.len > 0) | |
equalities.Pop() | |
pointer = equalities.len > 0 ? equalities.Peek() : -1 | |
postIns = postDel = false | |
changes = true | |
pointer += 1 | |
if (changes) | |
diff_cleanupMerge(diffs) | |
## | |
## Reorder and merge like edit sections. Merge equalities. | |
## Any edit section can move as long as it doesn't cross an equality. | |
## @param diffs List of Diff objects. | |
## | |
public void diff_cleanupMerge(seq[Diff] diffs) | |
# Add a dummy entry at the end. | |
diffs.add(newDiff(equalityOp, "")) | |
int pointer = 0 | |
int countDelete = 0 | |
int countInsert = 0 | |
string textDelete = "" | |
string textInsert = "" | |
int commonLength | |
while (pointer < diffs.len) | |
switch (diffs[pointer].operation) | |
case insertionOp: | |
countInsert += 1 | |
textInsert += diffs[pointer].text | |
pointer += 1 | |
break | |
case deletionOp: | |
countDelete += 1 | |
textDelete += diffs[pointer].text | |
pointer += 1 | |
break | |
case equalityOp: | |
# Upon reaching an equality, check for prior redundancies. | |
if (countDelete + countInsert > 1) | |
if (countDelete != 0 and countInsert != 0) | |
# Factor out any common prefixies. | |
commonLength = this.diff_commonPrefix(textInsert, textDelete) | |
if (commonLength != 0) | |
if ((pointer - countDelete - countInsert) > 0 and | |
diffs[pointer - countDelete - countInsert - 1].operation | |
== equalityOp) | |
diffs[pointer - countDelete - countInsert - 1].text | |
+= textInsert[0 .. commonLength] | |
else | |
diffs.Insert(0, newDiff(equalityOp, | |
textInsert[0 .. commonLength])) | |
pointer += 1 | |
textInsert = textInsert[commonLength .. len(textInsert)] | |
textDelete = textDelete[commonLength .. len(textDelete)] | |
# Factor out any common suffixies. | |
commonLength = this.diff_commonSuffix(textInsert, textDelete) | |
if (commonLength != 0) | |
diffs[pointer].text = textInsert.Substring(textInsert.len | |
- commonLength) + diffs[pointer].text | |
textInsert = textInsert.Substring(0, textInsert.len | |
- commonLength) | |
textDelete = textDelete.Substring(0, textDelete.len | |
- commonLength) | |
# Delete the offending records and add the merged ones. | |
if (countDelete == 0) | |
diffs.Splice(pointer - countInsert, | |
countDelete + countInsert, | |
newDiff(insertionOp, textInsert)) | |
elif (countInsert == 0) | |
diffs.Splice(pointer - countDelete, | |
countDelete + countInsert, | |
newDiff(deletionOp, textDelete)) | |
else | |
diffs.Splice(pointer - countDelete - countInsert, | |
countDelete + countInsert, | |
newDiff(deletionOp, textDelete), | |
newDiff(insertionOp, textInsert)) | |
pointer = pointer - countDelete - countInsert + | |
(countDelete != 0 ? 1 : 0) + (countInsert != 0 ? 1 : 0) + 1 | |
elif (pointer != 0 | |
and diffs[pointer - 1].operation == equalityOp) | |
# Merge this equality with the previous one. | |
diffs[pointer - 1].text += diffs[pointer].text | |
diffs.delete(pointer) | |
else | |
pointer += 1 | |
countInsert = 0 | |
countDelete = 0 | |
textDelete = "" | |
textInsert = "" | |
break | |
if (diffs[diffs.len - 1].text.len == 0) | |
diffs.delete(diffs.len - 1); // Remove the dummy entry at the end. | |
# Second pass: look for single edits surrounded on both sides by | |
# equalities which can be shifted sideways to eliminate an equality. | |
# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC | |
bool changes = false | |
pointer = 1 | |
# Intentionally ignore the first and last element (don't need checking). | |
while (pointer < (diffs.len - 1)) | |
if (diffs[pointer - 1].operation == equalityOp and | |
diffs[pointer + 1].operation == equalityOp) | |
# This is a single edit surrounded by equalities. | |
if (diffs[pointer].text.EndsWith(diffs[pointer - 1].text, | |
StringComparison.Ordinal)) | |
# Shift the edit over the previous equality. | |
diffs[pointer].text = diffs[pointer - 1].text + | |
diffs[pointer].text.Substring(0, diffs[pointer].text.len - | |
diffs[pointer - 1].text.len) | |
diffs[pointer + 1].text = diffs[pointer - 1].text | |
+ diffs[pointer + 1].text | |
diffs.Splice(pointer - 1, 1) | |
changes = true | |
elif (diffs[pointer].text.StartsWith(diffs[pointer + 1].text, | |
StringComparison.Ordinal)) | |
# Shift the edit over the next equality. | |
diffs[pointer - 1].text += diffs[pointer + 1].text | |
diffs[pointer].text = | |
diffs[pointer].text.Substring(diffs[pointer + 1].text.len) | |
+ diffs[pointer + 1].text | |
diffs.Splice(pointer + 1, 1) | |
changes = true | |
pointer += 1 | |
# If shifts were made, the diff needs reordering and another shift sweep. | |
if (changes) | |
this.diff_cleanupMerge(diffs) | |
## | |
## loc is a location in textOne, comAdde and return the equivalent location in | |
## textTwo. | |
## e.g. "The cat" vs "The big cat", 1->1, 5->8 | |
## @param diffs List of Diff objects. | |
## @param loc Location within textOne. | |
## @return Location within textTwo. | |
## | |
public int diff_xIndex(seq[Diff] diffs, int loc) | |
int charsOne = 0 | |
int charsTwo = 0 | |
int lastCharsOne = 0 | |
int lastCharsTwo = 0 | |
Diff lastDiff = null | |
for (Diff aDiff in diffs) | |
if (aDiff.operation != insertionOp) | |
# Equality or deletion. | |
charsOne += aDiff.text.len | |
if (aDiff.operation != deletionOp) | |
# Equality or insertion. | |
charsTwo += aDiff.text.len | |
if (charsOne > loc) | |
# Overshot the location. | |
lastDiff = aDiff | |
break | |
lastCharsOne = charsOne | |
lastCharsTwo = charsTwo | |
if (lastDiff != null and lastDiff.operation == deletionOp) | |
# The location was deleted. | |
return lastCharsTwo | |
# Add the remaining character length. | |
return lastCharsTwo + (loc - lastCharsOne) | |
## | |
## Convert a Diff list into a pretty HTML report. | |
## @param diffs List of Diff objects. | |
## @return HTML representation. | |
## | |
public string diff_prettyHtml(seq[Diff] diffs) | |
rope html = new rope() | |
for (Diff aDiff in diffs) | |
string text = aDiff.text.Replace("&", "&").Replace("<", "<") | |
.Replace(">", ">").Replace("\n", "¶<br>") | |
switch (aDiff.operation) | |
case insertionOp: | |
html.add("<ins style=\"background:#e6ffe6;\">").add(text) | |
.add("</ins>") | |
break | |
case deletionOp: | |
html.add("<del style=\"background:#ffe6e6;\">").add(text) | |
.add("</del>") | |
break | |
case equalityOp: | |
html.add("<span>").add(text).add("</span>") | |
break | |
return html.ToString() | |
## | |
## Compute and return the source text (all equalities and deletions). | |
## @param diffs List of Diff objects. | |
## @return Source text. | |
## | |
public string diffTextOne(seq[Diff] diffs) | |
rope text = new rope() | |
for (Diff aDiff in diffs) | |
if (aDiff.operation != insertionOp) | |
text.add(aDiff.text) | |
return text.ToString() | |
## | |
## Compute and return the destination text (all equalities and insertions). | |
## @param diffs List of Diff objects. | |
## @return Destination text. | |
## | |
public string diffTextTwo(seq[Diff] diffs) | |
rope text = new rope() | |
for (Diff aDiff in diffs) | |
if (aDiff.operation != deletionOp) | |
text.add(aDiff.text) | |
return text.ToString() | |
## | |
## Compute the Levenshtein distance; the number of inserted, deleted or | |
## substituted characters. | |
## @param diffs List of Diff objects. | |
## @return Number of changes. | |
## | |
public int diff_levenshtein(seq[Diff] diffs) | |
int levenshtein = 0 | |
int insertions = 0 | |
int deletions = 0 | |
for (Diff aDiff in diffs) | |
switch (aDiff.operation) | |
case insertionOp: | |
insertions += aDiff.text.len | |
break | |
case deletionOp: | |
deletions += aDiff.text.len | |
break | |
case equalityOp: | |
# A deletion and an insertion is one substitution. | |
levenshtein += max(insertions, deletions) | |
insertions = 0 | |
deletions = 0 | |
break | |
levenshtein += max(insertions, deletions) | |
return levenshtein | |
## | |
## Crush the diff into an encoded string which describes the operations | |
## required to transform textOne into textTwo. | |
## E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. | |
## Operations are tab-separated. Inserted text is escaped using %xx | |
## notation. | |
## @param diffs Array of Diff objects. | |
## @return Delta text. | |
## | |
public string diffToDelta(seq[Diff] diffs) | |
rope text = new rope() | |
for (Diff aDiff in diffs) | |
switch (aDiff.operation) | |
case insertionOp: | |
text.add("+").add(HttpUtility.UrlEncode(aDiff.text, | |
new UTF8Encoding()).Replace('+', ' ')).add("\t") | |
break | |
case deletionOp: | |
text.add("-").add(aDiff.text.len).add("\t") | |
break | |
case equalityOp: | |
text.add("=").add(aDiff.text.len).add("\t") | |
break | |
string delta = text.ToString() | |
if (delta.len != 0) | |
# Strip off trailing tab character. | |
delta = delta.Substring(0, delta.len - 1) | |
delta = unescapeForEncodeUriCompatability(delta) | |
return delta | |
## | |
## Given the original textOne, and an encoded string which describes the | |
## operations required to transform textOne into textTwo, comAdde the full diff. | |
## @param textOne Source string for the diff. | |
## @param delta Delta text. | |
## @return Array of Diff objects or null if invalid. | |
## @throws ArgumentException If invalid input. | |
## | |
public seq[Diff] diff_fromDelta(string textOne, string delta) | |
seq[Diff] diffs = newSeq[Diff]() | |
int pointer = 0; // Cursor in textOne | |
string[] tokens = delta.Split(new string[] "\t" , | |
StringSplitOptions.None) | |
for (string token in tokens) | |
if (token.len == 0) | |
# Blank tokens are ok (from a trailing \t). | |
continue | |
# Each token begins with a one character parameter which specifies the | |
# operation of this token (delete, insert, equality). | |
string param = token[1 .. len(token)] | |
switch (token[0]) | |
case '+': | |
# decode would change all "+" to " " | |
param = param.Replace("+", "%2b") | |
param = HttpUtility.UrlDecode(param, new UTF8Encoding(false, true)) | |
# catch (UnsupportedEncodingException e) | |
# // Not likely on modern system. | |
# throw new Error("This system does not support UTF-8.", e) | |
# catch (IllegalArgumentException e) | |
# // Malformed URI sequence. | |
# throw new IllegalArgumentException( | |
# "Illegal escape in diff_fromDelta: " + param, e) | |
# | |
diffs.add(newDiff(insertionOp, param)) | |
break | |
case '-': | |
# Fall through. | |
case '=': | |
int n | |
try | |
n = Convert.ToInt32(param) | |
catch (FormatException e) | |
throw new ArgumentException( | |
"Invalid number in diff_fromDelta: " + param, e) | |
if (n < 0) | |
throw new ArgumentException( | |
"Negative number in diff_fromDelta: " + param) | |
string text | |
try | |
text = textOne[pointer .. n] | |
pointer += n | |
catch (ArgumentOutOfRangeException e) | |
throw new ArgumentException("Delta length (" + pointer | |
+ ") larger than source text length (" + textOne.len | |
+ ").", e) | |
if (token[0] == '=') | |
diffs.add(newDiff(equalityOp, text)) | |
else | |
diffs.add(newDiff(deletionOp, text)) | |
break | |
default: | |
# Anything else is an error. | |
throw new ArgumentException( | |
"Invalid diff operation in diff_fromDelta: " + token[0]) | |
if (pointer != textOne.len) | |
throw new ArgumentException("Delta length (" + pointer | |
+ ") smaller than source text length (" + textOne.len + ").") | |
return diffs | |
# MATCH FUNCTIONS | |
## | |
## Locate the best instance of 'pattern' in 'text' near 'loc'. | |
## Returns -1 if no match found. | |
## @param text The text to search. | |
## @param pattern The pattern to search for. | |
## @param loc The location to search around. | |
## @return Best match index or -1. | |
## | |
public int match_main(string text, string pattern, int loc) | |
# Check for null inputs not needed since null can't be passed in C#. | |
loc = max(0, min(loc, text.len)) | |
if (text == pattern) | |
# Shortcut (potentially not guaranteed by the algorithm) | |
return 0 | |
elif (text.len == 0) | |
# Nothing to match. | |
return -1 | |
elif (loc + pattern.len <= text.len | |
and text.Substring(loc, pattern.len) == pattern) | |
# Perfect match at the perfect spot! (Includes case of null pattern) | |
return loc | |
else | |
# Do a fuzzy compare. | |
return match_bitap(text, pattern, loc) | |
## | |
## Locate the best instance of 'pattern' in 'text' near 'loc' using the | |
## Bitap algorithm. Returns -1 if no match found. | |
## @param text The text to search. | |
## @param pattern The pattern to search for. | |
## @param loc The location to search around. | |
## @return Best match index or -1. | |
## | |
protected int match_bitap(string text, string pattern, int loc) | |
# assert (Match_MaxBits == 0 or pattern.len <= Match_MaxBits) | |
# : "Pattern too long for this application." | |
# Initialise the alphabet. | |
Dictionary<char, int> s = match_alphabet(pattern) | |
# Highest score beyond which we give up. | |
double score_threshold = MatchThreshold | |
# Is there a nearby exact match? (speedup) | |
int bestLoc = text.find(pattern, loc, StringComparison.Ordinal) | |
if (bestLoc != -1) | |
score_threshold = min(match_bitapScore(0, bestLoc, loc, | |
pattern), score_threshold) | |
# What about in the other direction? (speedup) | |
bestLoc = text.rfind(pattern, | |
min(loc + pattern.len, text.len), | |
StringComparison.Ordinal) | |
if (bestLoc != -1) | |
score_threshold = min(match_bitapScore(0, bestLoc, loc, | |
pattern), score_threshold) | |
# Initialise the bit arrays. | |
int matchmask = 1 << (pattern.len - 1) | |
bestLoc = -1 | |
int binMin, binMid | |
int binMax = pattern.len + text.len | |
# Empty initialization added to appease C# compiler. | |
int[] last_rd = new int[0] | |
for (int d = 0; d < pattern.len; d += 1) | |
# Scan for the best match; each iteration allows for one more error. | |
# Run a binary search to determine how far from 'loc' we can stray at | |
# this error level. | |
binMin = 0 | |
binMid = binMax | |
while (binMin < binMid) | |
if (match_bitapScore(d, loc + binMid, loc, pattern) | |
<= score_threshold) | |
binMin = binMid | |
else | |
binMax = binMid | |
binMid = (binMax - binMin) / 2 + binMin | |
# Use the result from this iteration as the maximum for the next. | |
binMax = binMid | |
int start = max(1, loc - binMid + 1) | |
int finish = min(loc + binMid, text.len) + pattern.len | |
int[] rd = new int[finish + 2] | |
rd[finish + 1] = (1 << d) - 1 | |
for (int j = finish; j >= start; j--) | |
int charMatch | |
if (text.len <= j - 1 or !s.ContainsKey(text[j - 1])) | |
# Out of range. | |
charMatch = 0 | |
else | |
charMatch = s[text[j - 1]] | |
if (d == 0) | |
# First pass: exact match. | |
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | |
else | |
# Subsequent passes: fuzzy match. | |
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | |
| (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] | |
if ((rd[j] & matchmask) != 0) | |
double score = match_bitapScore(d, j - 1, loc, pattern) | |
# This match will almost certainly be better than any existing | |
# match. But check anyway. | |
if (score <= score_threshold) | |
# Told you so. | |
score_threshold = score | |
bestLoc = j - 1 | |
if (bestLoc > loc) | |
# When passing loc, don't exceed our current distance from loc. | |
start = max(1, 2 * loc - bestLoc) | |
else | |
# Already passed loc, downhill from here on in. | |
break | |
if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) | |
# No hope for a (better) match at greater error levels. | |
break | |
last_rd = rd | |
return bestLoc | |
## | |
## Compute and return the score for a match with e errors and x location. | |
## @param e Number of errors in match. | |
## @param x Location of match. | |
## @param loc Expected location of match. | |
## @param pattern Pattern being sought. | |
## @return Overall score for match (0.0 = good, 1.0 = bad). | |
## | |
private double match_bitapScore(int e, int x, int loc, string pattern) | |
float accuracy = (float)e / pattern.len | |
int proximity = abs(loc - x) | |
if (MatchDistance == 0) | |
# Dodge divide by zero error. | |
return proximity == 0 ? accuracy : 1.0 | |
return accuracy + (proximity / (float) MatchDistance) | |
## | |
## Initialise the alphabet for the Bitap algorithm. | |
## @param pattern The text to encode. | |
## @return Hash of character locations. | |
## | |
protected Dictionary<char, int> match_alphabet(string pattern) | |
Dictionary<char, int> s = new Dictionary<char, int>() | |
char[] charPattern = pattern.ToCharArray() | |
for (char c in charPattern) | |
if (!s.ContainsKey(c)) | |
s.add(c, 0) | |
int i = 0 | |
for (char c in charPattern) | |
int value = s[c] | (1 << (pattern.len - i - 1)) | |
s[c] = value | |
i += 1 | |
return s | |
# PATCH FUNCTIONS | |
## | |
## Increase the context until it is unique, | |
## but don't let the pattern expand beyond Match_MaxBits. | |
## @param patch The patch to grow. | |
## @param text Source text. | |
## | |
protected void patch_addContext(Patch patch, string text) | |
if (text.len == 0) | |
return | |
string pattern = text.Substring(patch.startTwo, patch.lenOne) | |
int padding = 0 | |
# Look for the first and last matches of pattern in text. If two | |
# different matches are found, increase the pattern length. | |
while (text.find(pattern, StringComparison.Ordinal) | |
!= text.rfind(pattern, StringComparison.Ordinal) | |
and pattern.len < Match_MaxBits - Patch_Margin - Patch_Margin) | |
padding += Patch_Margin | |
pattern = text.JavaSubstring(max(0, patch.startTwo - padding), | |
min(text.len, patch.startTwo + patch.lenOne + padding)) | |
# Add one chunk for good luck. | |
padding += Patch_Margin | |
# Add the prefix. | |
string prefix = text.JavaSubstring(max(0, patch.startTwo - padding), | |
patch.startTwo) | |
if (prefix.len != 0) | |
patch.diffs.Insert(0, newDiff(equalityOp, prefix)) | |
# Add the suffix. | |
string suffix = text.JavaSubstring(patch.startTwo + patch.lenOne, | |
min(text.len, patch.startTwo + patch.lenOne + padding)) | |
if (suffix.len != 0) | |
patch.diffs.add(newDiff(equalityOp, suffix)) | |
# Roll back the start points. | |
patch.startOne -= prefix.len | |
patch.startTwo -= prefix.len | |
# Extend the lengths. | |
patch.lenOne += prefix.len + suffix.len | |
patch.lenTwo += prefix.len + suffix.len | |
## | |
## Compute a list of patches to turn textOne into textTwo. | |
## A set of diffs will be computed. | |
## @param textOne Old text. | |
## @param textTwo New text. | |
## @return List of Patch objects. | |
## | |
public seq[Patch] patch_make(string textOne, string textTwo) | |
# Check for null inputs not needed since null can't be passed in C#. | |
# No diffs provided, comAdde our own. | |
seq[Diff] diffs = diff_main(textOne, textTwo, true) | |
if (diffs.len > 2) | |
diff_cleanupSemantic(diffs) | |
diff_cleanupEfficiency(diffs) | |
return patch_make(textOne, diffs) | |
## | |
## Compute a list of patches to turn textOne into textTwo. | |
## textOne will be derived from the provided diffs. | |
## @param diffs Array of Diff objects for textOne to textTwo. | |
## @return List of Patch objects. | |
## | |
public seq[Patch] patch_make(seq[Diff] diffs) | |
# Check for null inputs not needed since null can't be passed in C#. | |
# No origin string provided, comAdde our own. | |
string textOne = diffTextOne(diffs) | |
return patch_make(textOne, diffs) | |
## | |
## Compute a list of patches to turn textOne into textTwo. | |
## textTwo is ignored, diffs are the delta between textOne and textTwo. | |
## @param textOne Old text | |
## @param textTwo Ignored. | |
## @param diffs Array of Diff objects for textOne to textTwo. | |
## @return List of Patch objects. | |
## @deprecated Prefer patch_make(string textOne, seq[Diff] diffs). | |
## | |
public seq[Patch] patch_make(string textOne, string textTwo, | |
seq[Diff] diffs) | |
return patch_make(textOne, diffs) | |
## | |
## Compute a list of patches to turn textOne into textTwo. | |
## textTwo is not provided, diffs are the delta between textOne and textTwo. | |
## @param textOne Old text. | |
## @param diffs Array of Diff objects for textOne to textTwo. | |
## @return List of Patch objects. | |
## | |
public seq[Patch] patch_make(string textOne, seq[Diff] diffs) | |
# Check for null inputs not needed since null can't be passed in C#. | |
seq[Patch] patches = newSeq[Patch]() | |
if (diffs.len == 0) | |
return patches; // Get rid of the null case. | |
Patch patch = new Patch() | |
int charCountOne = 0; // Number of characters into the textOne string. | |
int charCountTwo = 0; // Number of characters into the textTwo string. | |
# Start with textOne (prepatchText) and apply the diffs until we arrive at | |
# textTwo (postpatchText). We recreate the patches one by one to determine | |
# context info. | |
string prepatchText = textOne | |
string postpatchText = textOne | |
for (Diff aDiff in diffs) | |
if (patch.diffs.len == 0 and aDiff.operation != equalityOp) | |
# A new patch starts here. | |
patch.startOne = charCountOne | |
patch.startTwo = charCountTwo | |
switch (aDiff.operation) | |
case insertionOp: | |
patch.diffs.add(aDiff) | |
patch.lenTwo += aDiff.text.len | |
postpatchText = postpatchText.Insert(charCountTwo, aDiff.text) | |
break | |
case deletionOp: | |
patch.lenOne += aDiff.text.len | |
patch.diffs.add(aDiff) | |
postpatchText = postpatchText.Remove(charCountTwo, | |
aDiff.text.len) | |
break | |
case equalityOp: | |
if (aDiff.text.len <= 2 * Patch_Margin | |
and patch.diffs.len() != 0 and aDiff != diffs.Last()) | |
# Small equality inside a patch. | |
patch.diffs.add(aDiff) | |
patch.lenOne += aDiff.text.len | |
patch.lenTwo += aDiff.text.len | |
if (aDiff.text.len >= 2 * Patch_Margin) | |
# Time for a new patch. | |
if (patch.diffs.len != 0) | |
patch_addContext(patch, prepatchText) | |
patches.add(patch) | |
patch = new Patch() | |
# Unlike Unidiff, our patch lists have a rolling context. | |
# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff | |
# Update prepatch text & pos to reflect the application of the | |
# just completed patch. | |
prepatchText = postpatchText | |
charCountOne = charCountTwo | |
break | |
# Update the current character count. | |
if (aDiff.operation != insertionOp) | |
charCountOne += aDiff.text.len | |
if (aDiff.operation != deletionOp) | |
charCountTwo += aDiff.text.len | |
# Pick up the leftover patch if not empty. | |
if (patch.diffs.len != 0) | |
patch_addContext(patch, prepatchText) | |
patches.add(patch) | |
return patches | |
## | |
## Given an array of patches, return another array that is identical. | |
## @param patches Array of Patch objects. | |
## @return Array of Patch objects. | |
## | |
public seq[Patch] patchDeepCopy(seq[Patch] patches) | |
seq[Patch] patchesCopy = newSeq[Patch]() | |
for (Patch aPatch in patches) | |
Patch patchCopy = new Patch() | |
for (Diff aDiff in aPatch.diffs) | |
Diff diffCopy = newDiff(aDiff.operation, aDiff.text) | |
patchCopy.diffs.add(diffCopy) | |
patchCopy.startOne = aPatch.startOne | |
patchCopy.startTwo = aPatch.startTwo | |
patchCopy.lenOne = aPatch.lenOne | |
patchCopy.lenTwo = aPatch.lenTwo | |
patchesCopy.add(patchCopy) | |
return patchesCopy | |
## | |
## Merge a set of patches onto the text. Return a patched text, as well | |
## as an array of true/false values indicating which patches were applied. | |
## @param patches Array of Patch objects | |
## @param text Old text. | |
## @return Two element Object array, containing the new text and an array of | |
## bool values. | |
## | |
public Object[] patch_apply(seq[Patch] patches, string text) | |
if (patches.len == 0) | |
return new Object[] text, new bool[0] | |
# Deep copy the patches so that no changes are made to originals. | |
patches = patchDeepCopy(patches) | |
string nullPadding = this.patch_addPadding(patches) | |
text = nullPadding + text + nullPadding | |
patchSplitMax(patches) | |
int x = 0 | |
# delta keeps track of the offset between the expected and actual | |
# location of the previous patch. If there are patches expected at | |
# positions 10 and 20, but the first patch was found at 12, delta is 2 | |
# and the second patch has an effective expected position of 22. | |
int delta = 0 | |
bool[] results = new bool[patches.len] | |
for (Patch aPatch in patches) | |
int expectedLoc = aPatch.startTwo + delta | |
string textOne = diffTextOne(aPatch.diffs) | |
int startLoc | |
int endLoc = -1 | |
if (textOne.len > this.Match_MaxBits) | |
# patchSplitMax will only provide an oversized pattern | |
# in the case of a monster delete. | |
startLoc = match_main(text, | |
textOne.Substring(0, this.Match_MaxBits), expectedLoc) | |
if (startLoc != -1) | |
endLoc = match_main(text, | |
textOne.Substring(textOne.len - this.Match_MaxBits), | |
expectedLoc + textOne.len - this.Match_MaxBits) | |
if (endLoc == -1 or startLoc >= endLoc) | |
# Can't find valid trailing context. Drop this patch. | |
startLoc = -1 | |
else | |
startLoc = this.match_main(text, textOne, expectedLoc) | |
if (startLoc == -1) | |
# No match found. :( | |
results[x] = false | |
# Subtract the delta for this failed patch from subsequent patches. | |
delta -= aPatch.lenTwo - aPatch.lenOne | |
else | |
# Found a match. :) | |
results[x] = true | |
delta = startLoc - expectedLoc | |
string textTwo | |
if (endLoc == -1) | |
textTwo = text.JavaSubstring(startLoc, | |
min(startLoc + textOne.len, text.len)) | |
else | |
textTwo = text.JavaSubstring(startLoc, | |
min(endLoc + this.Match_MaxBits, text.len)) | |
if (textOne == textTwo) | |
# Perfect match, just shove the Replacement text in. | |
text = text[0 .. startLoc] + diffTextTwo(aPatch.diffs) | |
+ text.Substring(startLoc + textOne.len) | |
else | |
# Imperfect match. Run a diff to get a framework of equivalent | |
# indices. | |
seq[Diff] diffs = diff_main(textOne, textTwo, false) | |
if (textOne.len > this.Match_MaxBits | |
and this.diff_levenshtein(diffs) / (float) textOne.len | |
> this.PatchDeleteThreshold) | |
# The end points match, but the content is unacceptably bad. | |
results[x] = false | |
else | |
diff_cleanupSemanticLossless(diffs) | |
int indexOne = 0 | |
for (Diff aDiff in aPatch.diffs) | |
if (aDiff.operation != equalityOp) | |
int indexTwo = diff_xIndex(diffs, indexOne) | |
if (aDiff.operation == insertionOp) | |
# Insertion | |
text = text.Insert(startLoc + indexTwo, aDiff.text) | |
elif (aDiff.operation == deletionOp) | |
# Deletion | |
text = text.Remove(startLoc + indexTwo, diff_xIndex(diffs, | |
indexOne + aDiff.text.len) - indexTwo) | |
if (aDiff.operation != deletionOp) | |
indexOne += aDiff.text.len | |
x += 1 | |
# Strip the padding off. | |
text = text.Substring(nullPadding.len, text.len | |
- 2 * nullPadding.len) | |
return new Object[] text, results | |
## | |
## Add some padding on text start and end so that edges can match something. | |
## Intended to be called only from within patch_apply. | |
## @param patches Array of Patch objects. | |
## @return The padding string added to each side. | |
## | |
public string patch_addPadding(seq[Patch] patches) | |
short paddingLength = this.Patch_Margin | |
string nullPadding = "" | |
for (short x = 1; x <= paddingLength; x += 1) | |
nullPadding += (char)x | |
# Bump all the patches forward. | |
for (Patch aPatch in patches) | |
aPatch.startOne += paddingLength | |
aPatch.startTwo += paddingLength | |
# Add some padding on start of first diff. | |
Patch patch = patches.First() | |
seq[Diff] diffs = patch.diffs | |
if (diffs.len == 0 or diffs.First().operation != equalityOp) | |
# Add nullPadding equality. | |
diffs.Insert(0, newDiff(equalityOp, nullPadding)) | |
patch.startOne -= paddingLength; // Should be 0. | |
patch.startTwo -= paddingLength; // Should be 0. | |
patch.lenOne += paddingLength | |
patch.lenTwo += paddingLength | |
elif (paddingLength > diffs.First().text.len) | |
# Grow first equality. | |
Diff firstDiff = diffs.First() | |
int extraLength = paddingLength - firstDiff.text.len | |
firstDiff.text = nullPadding.Substring(firstDiff.text.len) | |
+ firstDiff.text | |
patch.startOne -= extraLength | |
patch.startTwo -= extraLength | |
patch.lenOne += extraLength | |
patch.lenTwo += extraLength | |
# Add some padding on end of last diff. | |
patch = patches.Last() | |
diffs = patch.diffs | |
if (diffs.len == 0 or diffs.Last().operation != equalityOp) | |
# Add nullPadding equality. | |
diffs.add(newDiff(equalityOp, nullPadding)) | |
patch.lenOne += paddingLength | |
patch.lenTwo += paddingLength | |
elif (paddingLength > diffs.Last().text.len) | |
# Grow last equality. | |
Diff lastDiff = diffs.Last() | |
int extraLength = paddingLength - lastDiff.text.len | |
lastDiff.text += nullPadding[0 .. extraLength] | |
patch.lenOne += extraLength | |
patch.lenTwo += extraLength | |
return nullPadding | |
## | |
## Look through the patches and break up any which are longer than the | |
## maximum limit of the match algorithm. | |
## Intended to be called only from within patch_apply. | |
## @param patches List of Patch objects. | |
## | |
public void patchSplitMax(seq[Patch] patches) | |
short patchSize = this.Match_MaxBits | |
for (int x = 0; x < patches.len; x += 1) | |
if (patches[x].lenOne <= patchSize) | |
continue | |
Patch bigPatch = patches[x] | |
# Remove the big old patch. | |
patches.Splice(x--, 1) | |
int startOne = bigPatch.startOne | |
int startTwo = bigPatch.startTwo | |
string preContext = "" | |
while (bigPatch.diffs.len != 0) | |
# Create one of several smaller patches. | |
Patch patch = new Patch() | |
bool empty = true | |
patch.startOne = startOne - preContext.len | |
patch.startTwo = startTwo - preContext.len | |
if (preContext.len != 0) | |
patch.lenOne = patch.lenTwo = preContext.len | |
patch.diffs.add(newDiff(equalityOp, preContext)) | |
while (bigPatch.diffs.len != 0 | |
and patch.lenOne < patchSize - this.Patch_Margin) | |
Operation diffType = bigPatch.diffs[0].operation | |
string diffText = bigPatch.diffs[0].text | |
if (diffType == insertionOp) | |
# Insertions are harmless. | |
patch.lenTwo += diffText.len | |
startTwo += diffText.len | |
patch.diffs.add(bigPatch.diffs.First()) | |
bigPatch.diffs.delete(0) | |
empty = false | |
elif (diffType == deletionOp and patch.diffs.len == 1 | |
and patch.diffs.First().operation == equalityOp | |
and diffText.len > 2 * patchSize) | |
# This is a large deletion. Let it pass in one chunk. | |
patch.lenOne += diffText.len | |
startOne += diffText.len | |
empty = false | |
patch.diffs.add(newDiff(diffType, diffText)) | |
bigPatch.diffs.delete(0) | |
else | |
# Deletion or equality. Only take as much as we can stomach. | |
diffText = diffText.Substring(0, min(diffText.len, | |
patchSize - patch.lenOne - Patch_Margin)) | |
patch.lenOne += diffText.len | |
startOne += diffText.len | |
if (diffType == equalityOp) | |
patch.lenTwo += diffText.len | |
startTwo += diffText.len | |
else | |
empty = false | |
patch.diffs.add(newDiff(diffType, diffText)) | |
if (diffText == bigPatch.diffs[0].text) | |
bigPatch.diffs.delete(0) | |
else | |
bigPatch.diffs[0].text = | |
bigPatch.diffs[0].text.Substring(diffText.len) | |
# Compute the head context for the next patch. | |
preContext = this.diffTextTwo(patch.diffs) | |
preContext = preContext.Substring(max(0, | |
preContext.len - this.Patch_Margin)) | |
string postContext = null | |
# add the end context for this patch. | |
if (diffTextOne(bigPatch.diffs).len > Patch_Margin) | |
postContext = diffTextOne(bigPatch.diffs) | |
.Substring(0, Patch_Margin) | |
else | |
postContext = diffTextOne(bigPatch.diffs) | |
if (postContext.len != 0) | |
patch.lenOne += postContext.len | |
patch.lenTwo += postContext.len | |
if (patch.diffs.len != 0 | |
and patch.diffs[patch.diffs.len - 1].operation | |
== equalityOp) | |
patch.diffs[patch.diffs.len - 1].text += postContext | |
else | |
patch.diffs.add(newDiff(equalityOp, postContext)) | |
if (!empty) | |
patches.Splice(++x, 0, patch) | |
## | |
## Take a list of patches and return a textual representation. | |
## @param patches List of Patch objects. | |
## @return Text representation of patches. | |
## | |
public string patchToText(seq[Patch] patches) | |
rope text = new rope() | |
for (Patch aPatch in patches) | |
text.add(aPatch) | |
return text.ToString() | |
## | |
## Parse a textual representation of patches and return a List of Patch | |
## objects. | |
## @param textline Text representation of patches. | |
## @return List of Patch objects. | |
## @throws ArgumentException If invalid input. | |
## | |
public seq[Patch] patch_fromText(string textline) | |
seq[Patch] patches = newSeq[Patch]() | |
if (textline.len == 0) | |
return patches | |
string[] text = textline.Split('\n') | |
int textPointer = 0 | |
Patch patch | |
Regex patchHeader | |
= new Regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$") | |
Match m | |
char sign | |
string line | |
while (textPointer < text.len) | |
m = patchHeader.Match(text[textPointer]) | |
if (!m.Success) | |
throw new ArgumentException("Invalid patch string: " | |
+ text[textPointer]) | |
patch = new Patch() | |
patches.add(patch) | |
patch.startOne = Convert.ToInt32(m.Groups[1].Value) | |
if (m.Groups[2].len == 0) | |
patch.startOne-- | |
patch.lenOne = 1 | |
elif (m.Groups[2].Value == "0") | |
patch.lenOne = 0 | |
else | |
patch.startOne-- | |
patch.lenOne = Convert.ToInt32(m.Groups[2].Value) | |
patch.startTwo = Convert.ToInt32(m.Groups[3].Value) | |
if (m.Groups[4].len == 0) | |
patch.startTwo-- | |
patch.lenTwo = 1 | |
elif (m.Groups[4].Value == "0") | |
patch.lenTwo = 0 | |
else | |
patch.startTwo-- | |
patch.lenTwo = Convert.ToInt32(m.Groups[4].Value) | |
textPointer += 1 | |
while (textPointer < text.len) | |
try | |
sign = text[textPointer][0] | |
catch (IndexOutOfRangeException) | |
# Blank line? Whatever. | |
textPointer += 1 | |
continue | |
line = text[textPointer][1 .. len()] | |
line = line.Replace("+", "%2b") | |
line = HttpUtility.UrlDecode(line, new UTF8Encoding(false, true)) | |
if (sign == '-') | |
# Deletion. | |
patch.diffs.add(newDiff(deletionOp, line)) | |
elif (sign == '+') | |
# Insertion. | |
patch.diffs.add(newDiff(insertionOp, line)) | |
elif (sign == ' ') | |
# Minor equality. | |
patch.diffs.add(newDiff(equalityOp, line)) | |
elif (sign == '@') | |
# Start of next patch. | |
break | |
else | |
# WTF? | |
throw new ArgumentException( | |
"Invalid patch mode '" + sign + "' in: " + line) | |
textPointer += 1 | |
return patches | |
## | |
## Unescape selected chars for compatability with JavaScript's encodeURI. | |
## In speed critical applications this could be dropped since the | |
## receiving application will certainly decode these fine. | |
## Note that this function is case-sensitive. Thus "%3F" would not be | |
## unescaped. But this is ok because it is only called with the output of | |
## HttpUtility.UrlEncode which returns lowercase hex. | |
## | |
## Example: "%3f" -> "?", "%24" -> "$", etc. | |
## | |
## @param str The string to escape. | |
## @return The escaped string. | |
## | |
public static string unescapeForEncodeUriCompatability(string str) | |
return str.Replace("%21", "!").Replace("%7e", "~") | |
.Replace("%27", "'").Replace("%28", "(").Replace("%29", ")") | |
.Replace("%3b", ";").Replace("%2f", "/").Replace("%3f", "?") | |
.Replace("%3a", ":").Replace("%40", "@").Replace("%26", "&") | |
.Replace("%3d", "=").Replace("%2b", "+").Replace("%24", "$") | |
.Replace("%2c", ",").Replace("%23", "#") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment