Last active
August 29, 2015 14:11
-
-
Save bgourlie/3e9de92fb357b5a140e4 to your computer and use it in GitHub Desktop.
Damerau-Levenshtein distance algo implemented in dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Based on c# implementation here: http://stackoverflow.com/questions/12027324/damerau-levenshtein-distance-algorithm-disable-counting-of-delete | |
import 'dart:typed_data'; | |
void main() { | |
final testFunction = distance4; | |
const WARMUP_ITERATIONS = 1000; | |
const RUN_ITERATIONS = 1000000; | |
const SOURCE = 'The quick brown fox jumped over the lazy dog.'; | |
const TARGET = 'Teh Qiuck bronw fox jump over the lzy dog'; | |
final stopwatch = new Stopwatch(); | |
print('Damerau-Levenshtein Distance between "$SOURCE" and "$TARGET": ${testFunction(SOURCE, TARGET, 256)}'); | |
// Warmup | |
print('Warming up VM with $WARMUP_ITERATIONS iterations.'); | |
for(int i = 0; i < WARMUP_ITERATIONS; i++){ | |
testFunction(SOURCE, TARGET, 255); | |
} | |
//test | |
print('Performing test with $RUN_ITERATIONS iterations.'); | |
stopwatch.start(); | |
for(int i = 0; i < RUN_ITERATIONS; i++){ | |
testFunction(SOURCE, TARGET, 255); | |
} | |
stopwatch.stop(); | |
print('$RUN_ITERATIONS iterations took ${stopwatch.elapsedMilliseconds}ms.'); | |
} | |
// 1498ms | |
// 15433ms | |
int distance(String source, String target, int threshold){ | |
const maxThreshold = 255; | |
if(threshold > maxThreshold){ | |
throw 'Max threshold is $maxThreshold'; | |
} | |
// normalization -- case insensitive | |
source = source.toLowerCase(); | |
target = target.toLowerCase(); | |
// normalization -- source should always be shorter than or equal in length to target | |
if(source.length > target.length){ | |
final tmp = target; | |
target = source; | |
source = tmp; | |
} | |
final length1 = source.length; | |
final length2 = target.length; | |
if(length2 - length1 > threshold){ | |
return maxThreshold; | |
} | |
final maxi = length1; | |
final maxj = length2; | |
var dCurrent = new List<int>.filled(maxi + 1, 0); | |
var dMinus1 = new List<int>.filled(maxi + 1, 0); | |
var dMinus2 = new List<int>.filled(maxi + 1, 0); | |
List<int> dSwap; | |
for(int i = 0; i < maxi; i++){ | |
dCurrent[i] = i; | |
} | |
int jm1 = 0, im1 = 0, im2 = -1; | |
for(int j = 1; j <= maxj; j++){ | |
// rotate | |
dSwap = dMinus2; | |
dMinus2 = dMinus1; | |
dMinus1 = dCurrent; | |
dCurrent = dSwap; | |
// initialize | |
int minDistance = maxThreshold; | |
dCurrent[0] = j; | |
im1 = 0; | |
im2 = -1; | |
for(int i = 1; i <= maxi; i++){ | |
int cost = source[im1] == target[jm1] ? 0 : 1; | |
int del = dCurrent[im1] + 1; | |
int ins = dMinus1[i] + 1; | |
int sub = dMinus1[im1] + cost; | |
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del); | |
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){ | |
final possMin = dMinus2[im2] + cost; | |
min = min < possMin ? min : possMin; | |
} | |
dCurrent[i] = min; | |
if(min < minDistance){ | |
minDistance = min; | |
} | |
im1++; | |
im2++; | |
} | |
jm1++; | |
if(minDistance > threshold){ | |
return maxThreshold; | |
} | |
} | |
int result = dCurrent[maxi]; | |
return (result > threshold) ? maxThreshold : result; | |
} | |
// 1696 | |
// 17665 | |
int distance2(String source, String target, int threshold){ | |
const maxThreshold = 255; | |
if(threshold > maxThreshold){ | |
throw 'Max threshold is $maxThreshold'; | |
} | |
// normalization -- case insensitive | |
source = source.toLowerCase(); | |
target = target.toLowerCase(); | |
// normalization -- source should always be shorter than or equal in length to target | |
if(source.length > target.length){ | |
final tmp = target; | |
target = source; | |
source = tmp; | |
} | |
final length1 = source.length; | |
final length2 = target.length; | |
if(length2 - length1 > threshold){ | |
return maxThreshold; | |
} | |
final maxi = length1; | |
final maxj = length2; | |
var dCurrent = new Uint8ClampedList(maxi + 1); | |
var dMinus1 = new Uint8ClampedList(maxi + 1); | |
var dMinus2 = new Uint8ClampedList(maxi + 1); | |
Uint8ClampedList dSwap; | |
for(int i = 0; i < maxi; i++){ | |
dCurrent[i] = i; | |
} | |
int jm1 = 0, im1 = 0, im2 = -1; | |
for(int j = 1; j <= maxj; j++){ | |
// rotate | |
dSwap = dMinus2; | |
dMinus2 = dMinus1; | |
dMinus1 = dCurrent; | |
dCurrent = dSwap; | |
// initialize | |
int minDistance = maxThreshold; | |
dCurrent[0] = j; | |
im1 = 0; | |
im2 = -1; | |
for(int i = 1; i <= maxi; i++){ | |
int cost = source[im1] == target[jm1] ? 0 : 1; | |
int del = dCurrent[im1] + 1; | |
int ins = dMinus1[i] + 1; | |
int sub = dMinus1[im1] + cost; | |
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del); | |
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){ | |
final possMin = dMinus2[im2] + cost; | |
min = min < possMin ? min : possMin; | |
} | |
dCurrent[i] = min; | |
if(min < minDistance){ | |
minDistance = min; | |
} | |
im1++; | |
im2++; | |
} | |
jm1++; | |
if(minDistance > threshold){ | |
return maxThreshold; | |
} | |
} | |
int result = dCurrent[maxi]; | |
return (result > threshold) ? maxThreshold : result; | |
} | |
// 18234 | |
int distance3(String source, String target, int threshold){ | |
const maxThreshold = 255; | |
if(threshold > maxThreshold){ | |
throw 'Max threshold is $maxThreshold'; | |
} | |
// normalization -- case insensitive | |
source = source.toLowerCase(); | |
target = target.toLowerCase(); | |
// normalization -- source should always be shorter than or equal in length to target | |
if(source.length > target.length){ | |
final tmp = target; | |
target = source; | |
source = tmp; | |
} | |
final length1 = source.length; | |
final length2 = target.length; | |
if(length2 - length1 > threshold){ | |
return maxThreshold; | |
} | |
final maxi = length1; | |
final maxj = length2; | |
final mem = new Uint8ClampedList((maxi + 1) * 3); | |
// var dCurrent = new Uint8ClampedList(maxi + 1); | |
// var dMinus1 = new Uint8ClampedList(maxi + 1); | |
// var dMinus2 = new Uint8ClampedList(maxi + 1); | |
// Uint8ClampedList dSwap; | |
int dSwapOffset; | |
int dCurrentOffset = 0; | |
int dMinus1Offset = maxi + 1; | |
int dMinus2Offset = (maxi + 1) * 2; | |
for(int i = 0; i < maxi; i++){ | |
mem[dCurrentOffset + i] = i; | |
} | |
int jm1 = 0, im1 = 0, im2 = -1; | |
for(int j = 1; j <= maxj; j++){ | |
// rotate | |
dSwapOffset = dMinus2Offset; | |
dMinus2Offset = dMinus1Offset; | |
dMinus1Offset = dCurrentOffset; | |
dCurrentOffset = dSwapOffset; | |
// initialize | |
int minDistance = maxThreshold; | |
mem[dCurrentOffset] = j; | |
im1 = 0; | |
im2 = -1; | |
for(int i = 1; i <= maxi; i++){ | |
int cost = source[im1] == target[jm1] ? 0 : 1; | |
int del = mem[dCurrentOffset + im1] + 1; | |
int ins = mem[dMinus1Offset + i] + 1; | |
int sub = mem[dMinus1Offset + im1] + cost; | |
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del); | |
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){ | |
final possMin = mem[dMinus2Offset + im2] + cost; | |
min = min < possMin ? min : possMin; | |
} | |
mem[dCurrentOffset + i] = min; | |
if(min < minDistance){ | |
minDistance = min; | |
} | |
im1++; | |
im2++; | |
} | |
jm1++; | |
if(minDistance > threshold){ | |
return maxThreshold; | |
} | |
} | |
int result = mem[dCurrentOffset + maxi]; | |
return (result > threshold) ? maxThreshold : result; | |
} | |
int distance4(String source, String target, int threshold){ | |
const maxThreshold = 255; | |
if(threshold > maxThreshold){ | |
throw 'Max threshold is $maxThreshold'; | |
} | |
// normalization -- case insensitive | |
source = source.toLowerCase(); | |
target = target.toLowerCase(); | |
// normalization -- source should always be shorter than or equal in length to target | |
if(source.length > target.length){ | |
final tmp = target; | |
target = source; | |
source = tmp; | |
} | |
final length1 = source.length; | |
final length2 = target.length; | |
if(length2 - length1 > threshold){ | |
return maxThreshold; | |
} | |
final maxi = length1; | |
final maxj = length2; | |
final mem = new List<int>.filled((maxi + 1) * 3, 0); | |
// var dCurrent = new Uint8ClampedList(maxi + 1); | |
// var dMinus1 = new Uint8ClampedList(maxi + 1); | |
// var dMinus2 = new Uint8ClampedList(maxi + 1); | |
// Uint8ClampedList dSwap; | |
int dSwapOffset; | |
int dCurrentOffset = 0; | |
int dMinus1Offset = maxi + 1; | |
int dMinus2Offset = (maxi + 1) * 2; | |
for(int i = 0; i < maxi; i++){ | |
mem[dCurrentOffset + i] = i; | |
} | |
int jm1 = 0, im1 = 0, im2 = -1; | |
for(int j = 1; j <= maxj; j++){ | |
// rotate | |
dSwapOffset = dMinus2Offset; | |
dMinus2Offset = dMinus1Offset; | |
dMinus1Offset = dCurrentOffset; | |
dCurrentOffset = dSwapOffset; | |
// initialize | |
int minDistance = maxThreshold; | |
mem[dCurrentOffset] = j; | |
im1 = 0; | |
im2 = -1; | |
for(int i = 1; i <= maxi; i++){ | |
int cost = source[im1] == target[jm1] ? 0 : 1; | |
int del = mem[dCurrentOffset + im1] + 1; | |
int ins = mem[dMinus1Offset + i] + 1; | |
int sub = mem[dMinus1Offset + im1] + cost; | |
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del); | |
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){ | |
final possMin = mem[dMinus2Offset + im2] + cost; | |
min = min < possMin ? min : possMin; | |
} | |
mem[dCurrentOffset + i] = min; | |
if(min < minDistance){ | |
minDistance = min; | |
} | |
im1++; | |
im2++; | |
} | |
jm1++; | |
if(minDistance > threshold){ | |
return maxThreshold; | |
} | |
} | |
int result = mem[dCurrentOffset + maxi]; | |
return (result > threshold) ? maxThreshold : result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment