Last active
December 25, 2015 21:58
-
-
Save Varriount/7045833 to your computer and use it in GitHub Desktop.
DiffMatchPath Bug
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from times import epochTime | |
#import ropes | |
type | |
NaturalFloat = range [0.0 .. Inf] | |
TDMPSettings = object | |
diffTimeout: NaturalFloat ## Number of seconds to map a diff before | |
## giving up (0 for infinity). | |
diffEditCost: Natural ## Cost of an empty edit operation in terms | |
## of edit characters. | |
matchThreshold: NaturalFloat ## At what point is no match declared | |
## (0.0 = perfection, 1.0 = very loose). | |
matchDistance: Natural ## How far to search for a match \ | |
## (0 = exact location, 1000+ = broad match). | |
## A match this many characters away from the expected location | |
## will add 1.0 to the score (0.0 is a perfect match). | |
patchDeleteThreshold: NaturalFloat ## When deleting a large block of \ | |
## text (over ~64 characters), how close do the contents have to | |
## be to match the expected contents. (0.0 = perfect, 1.0 = loose). | |
## Note that MatchThreshold controls how closely the end points | |
## of a delete need to match. | |
patchMargin: Natural # Chunk size for context length. | |
matchMaxBits: Natural # Max bits in an int | |
PDMPSettings = ref TDMPSettings | |
## Kinds of text operations. | |
ETextOperation = enum | |
deletionOp ## A deletion operation. | |
insertionOp ## An insertion operation. | |
equalityOp ## An equality operation. | |
TDiff = object | |
operation:ETextOperation | |
content:string | |
PDiff = ref PDiff | |
TDiffSeq = seq[PDiff] ## A sequence of diff operations. | |
PDiffSeq = ref TDiffSeq | |
TDiffPatch = object | |
diffs: PDiffSeq | |
startOne, startTwo, lengthOne, lengthTwo: int | |
PDiffPatch = ref TDiffPatch | |
THalfMatchData = tuple[textOnePrefix, textOneSuffix, | |
textTwoPrefix, textTwoSuffix, | |
commonMiddle: string] | |
TLineToCharData = tuple[textOne, textTwo:string, textArray:seq[string]] | |
# Constructors | |
proc DMPSettings(diffTimeout=1.0, diffEditCost=4, | |
matchThreshold=0.5, matchDistance=1000, | |
patchDeleteThreshold=0.5, patchMargin=4, | |
matchMaxBits=32): PDMPSettings = | |
new(result) | |
result.diffTimeout = diffTimeout | |
result.diffEditCost = diffEditCost | |
result.matchThreshold = matchThreshold | |
result.matchDistance = matchDistance | |
result.patchDeleteThreshold = patchDeleteThreshold | |
result.patchMargin = patchMargin | |
result.matchMaxBits = matchMaxBits | |
proc newPatch(diffs: PDiffSeq, startA, startB, lengthA, lengthB: int): PDiffPatch = | |
new(result) | |
result.diffs = diffs | |
# If I leave this section out, nimrod hangs, else, it crashes with what windows says is a stack overflow. | |
result.startOne = startA | |
result.startTwo = startB | |
result.lengthOne = lengthA | |
result.lengthTwo = lengthB | |
# Utility Procedures | |
proc calcDeadline(inputDeadline, timeout: float): float = | |
## Used by diffMain to calculate it's input deadline. | |
if (inputDeadline <= 0): | |
result = epochTime() | |
else: | |
result = epochTime() + timeout | |
proc generateCoords(startPos, length: int): string = | |
## Used by stringify procedures to calculate coordinates from patch | |
## positions and lengths. | |
if length == 0: | |
result = $startPos & ",0" | |
elif length == 1: | |
result = $(startPos + 1) | |
else: | |
result = $(startPos + 1) & "," & $length | |
# Operator procedures | |
proc `$` (diffChunk: TDiff): string = | |
var prettyText = diffChunk.content #diffChunk.text.replace("\n", "\u00b6") | |
result = "TDiff(operation=" & $diffChunk.operation & ", content=\"" & prettyText & "\")" | |
proc `$` (patch: PDiffPatch): string = | |
var coordsOne = generateCoords(patch.startOne, patch.lengthOne) | |
var coordsTwo = generateCoords(patch.startTwo, patch.lengthTwo) | |
var resultRope = "@@ -" | |
resultRope = resultRope & $coordsOne & " +" & coordsTwo & " @@\n" | |
for diff in patch.diffs: | |
case diff.operation: | |
of insertionOp: | |
resultRope.add("+") | |
of deletionOp: | |
resultRope.add("-") | |
of equalityOp: | |
resultRope.add(" ") | |
resultRope.add(diff.content) #TODO - Encode data using url encoding | |
result = $resultRope | |
proc diffMain(textOne, textTwo: string, checkLines: bool, deadline:float): TDiffSeq = | |
if textOne == textTwo: | |
result = newSeq[PDiff] | |
if len(textOne) != 0: | |
result.add(TDiff(operation:equalityOp, textOne)) | |
return result | |
# Trim off common prefix (speedup). | |
var commonLength = diff_commonPrefix(textOne, textTwo) | |
var commonPrefix = textOne[0..commonLength] | |
textOne = textOne[commonLength] | |
textTwo = textTwo[commonLength] | |
# Trim off common suffix (speedup). | |
commonLength = diff_commonSuffix(textOne, textTwo) | |
var commonSuffix = textOne[len(textOne) - commonLength] | |
textOne = textOne[0, len(textOne) - commonLength] | |
textTwo = textTwo[0, len(textTwo) - commonLength] | |
# Compute the diff on the middle block. | |
diffs = diff_compute(textOne, textTwo, checklines, deadline) | |
# Restore the prefix and suffix. | |
if (len(commonprefix) != 0) | |
diffs.insert(0, Diff(equalityOp, commonPrefix))) | |
if (len(commonsuffix) != 0) | |
diffs.add(TDiff(equalityOp, commonSuffix)) | |
diff_cleanupMerge(diffs) | |
return diffs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment