Skip to content

Instantly share code, notes, and snippets.

@Varriount
Last active December 25, 2015 21:58
Show Gist options
  • Save Varriount/7045833 to your computer and use it in GitHub Desktop.
Save Varriount/7045833 to your computer and use it in GitHub Desktop.
DiffMatchPath Bug
from times import epochTime
#import ropes
type
NaturalFloat = range [0.0 .. Inf]
TDMPSettings = object
diffTimeout: NaturalFloat ## Number of seconds to map a diff before
## giving up (0 for infinity).
diffEditCost: Natural ## Cost of an empty edit operation in terms
## of edit characters.
matchThreshold: NaturalFloat ## At what point is no match declared
## (0.0 = perfection, 1.0 = very loose).
matchDistance: Natural ## How far to search for a match \
## (0 = exact location, 1000+ = broad match).
## A match this many characters away from the expected location
## will add 1.0 to the score (0.0 is a perfect match).
patchDeleteThreshold: NaturalFloat ## When deleting a large block of \
## text (over ~64 characters), how close do the contents have to
## be to match the expected contents. (0.0 = perfect, 1.0 = loose).
## Note that MatchThreshold controls how closely the end points
## of a delete need to match.
patchMargin: Natural # Chunk size for context length.
matchMaxBits: Natural # Max bits in an int
PDMPSettings = ref TDMPSettings
## Kinds of text operations.
ETextOperation = enum
deletionOp ## A deletion operation.
insertionOp ## An insertion operation.
equalityOp ## An equality operation.
TDiff = object
operation:ETextOperation
content:string
PDiff = ref PDiff
TDiffSeq = seq[PDiff] ## A sequence of diff operations.
PDiffSeq = ref TDiffSeq
TDiffPatch = object
diffs: PDiffSeq
startOne, startTwo, lengthOne, lengthTwo: int
PDiffPatch = ref TDiffPatch
THalfMatchData = tuple[textOnePrefix, textOneSuffix,
textTwoPrefix, textTwoSuffix,
commonMiddle: string]
TLineToCharData = tuple[textOne, textTwo:string, textArray:seq[string]]
# Constructors
proc DMPSettings(diffTimeout=1.0, diffEditCost=4,
matchThreshold=0.5, matchDistance=1000,
patchDeleteThreshold=0.5, patchMargin=4,
matchMaxBits=32): PDMPSettings =
new(result)
result.diffTimeout = diffTimeout
result.diffEditCost = diffEditCost
result.matchThreshold = matchThreshold
result.matchDistance = matchDistance
result.patchDeleteThreshold = patchDeleteThreshold
result.patchMargin = patchMargin
result.matchMaxBits = matchMaxBits
proc newPatch(diffs: PDiffSeq, startA, startB, lengthA, lengthB: int): PDiffPatch =
new(result)
result.diffs = diffs
# If I leave this section out, nimrod hangs, else, it crashes with what windows says is a stack overflow.
result.startOne = startA
result.startTwo = startB
result.lengthOne = lengthA
result.lengthTwo = lengthB
# Utility Procedures
proc calcDeadline(inputDeadline, timeout: float): float =
## Used by diffMain to calculate it's input deadline.
if (inputDeadline <= 0):
result = epochTime()
else:
result = epochTime() + timeout
proc generateCoords(startPos, length: int): string =
## Used by stringify procedures to calculate coordinates from patch
## positions and lengths.
if length == 0:
result = $startPos & ",0"
elif length == 1:
result = $(startPos + 1)
else:
result = $(startPos + 1) & "," & $length
# Operator procedures
proc `$` (diffChunk: TDiff): string =
var prettyText = diffChunk.content #diffChunk.text.replace("\n", "\u00b6")
result = "TDiff(operation=" & $diffChunk.operation & ", content=\"" & prettyText & "\")"
proc `$` (patch: PDiffPatch): string =
var coordsOne = generateCoords(patch.startOne, patch.lengthOne)
var coordsTwo = generateCoords(patch.startTwo, patch.lengthTwo)
var resultRope = "@@ -"
resultRope = resultRope & $coordsOne & " +" & coordsTwo & " @@\n"
for diff in patch.diffs:
case diff.operation:
of insertionOp:
resultRope.add("+")
of deletionOp:
resultRope.add("-")
of equalityOp:
resultRope.add(" ")
resultRope.add(diff.content) #TODO - Encode data using url encoding
result = $resultRope
proc diffMain(textOne, textTwo: string, checkLines: bool, deadline:float): TDiffSeq =
if textOne == textTwo:
result = newSeq[PDiff]
if len(textOne) != 0:
result.add(TDiff(operation:equalityOp, textOne))
return result
# Trim off common prefix (speedup).
var commonLength = diff_commonPrefix(textOne, textTwo)
var commonPrefix = textOne[0..commonLength]
textOne = textOne[commonLength]
textTwo = textTwo[commonLength]
# Trim off common suffix (speedup).
commonLength = diff_commonSuffix(textOne, textTwo)
var commonSuffix = textOne[len(textOne) - commonLength]
textOne = textOne[0, len(textOne) - commonLength]
textTwo = textTwo[0, len(textTwo) - commonLength]
# Compute the diff on the middle block.
diffs = diff_compute(textOne, textTwo, checklines, deadline)
# Restore the prefix and suffix.
if (len(commonprefix) != 0)
diffs.insert(0, Diff(equalityOp, commonPrefix)))
if (len(commonsuffix) != 0)
diffs.add(TDiff(equalityOp, commonSuffix))
diff_cleanupMerge(diffs)
return diffs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment