Skip to content

Instantly share code, notes, and snippets.

@bgourlie
Last active August 29, 2015 14:11
Show Gist options
  • Save bgourlie/3e9de92fb357b5a140e4 to your computer and use it in GitHub Desktop.
Save bgourlie/3e9de92fb357b5a140e4 to your computer and use it in GitHub Desktop.
Damerau-Levenshtein distance algo implemented in dart
// Based on c# implementation here: http://stackoverflow.com/questions/12027324/damerau-levenshtein-distance-algorithm-disable-counting-of-delete
import 'dart:typed_data';
void main() {
final testFunction = distance4;
const WARMUP_ITERATIONS = 1000;
const RUN_ITERATIONS = 1000000;
const SOURCE = 'The quick brown fox jumped over the lazy dog.';
const TARGET = 'Teh Qiuck bronw fox jump over the lzy dog';
final stopwatch = new Stopwatch();
print('Damerau-Levenshtein Distance between "$SOURCE" and "$TARGET": ${testFunction(SOURCE, TARGET, 256)}');
// Warmup
print('Warming up VM with $WARMUP_ITERATIONS iterations.');
for(int i = 0; i < WARMUP_ITERATIONS; i++){
testFunction(SOURCE, TARGET, 255);
}
//test
print('Performing test with $RUN_ITERATIONS iterations.');
stopwatch.start();
for(int i = 0; i < RUN_ITERATIONS; i++){
testFunction(SOURCE, TARGET, 255);
}
stopwatch.stop();
print('$RUN_ITERATIONS iterations took ${stopwatch.elapsedMilliseconds}ms.');
}
// 1498ms
// 15433ms
int distance(String source, String target, int threshold){
const maxThreshold = 255;
if(threshold > maxThreshold){
throw 'Max threshold is $maxThreshold';
}
// normalization -- case insensitive
source = source.toLowerCase();
target = target.toLowerCase();
// normalization -- source should always be shorter than or equal in length to target
if(source.length > target.length){
final tmp = target;
target = source;
source = tmp;
}
final length1 = source.length;
final length2 = target.length;
if(length2 - length1 > threshold){
return maxThreshold;
}
final maxi = length1;
final maxj = length2;
var dCurrent = new List<int>.filled(maxi + 1, 0);
var dMinus1 = new List<int>.filled(maxi + 1, 0);
var dMinus2 = new List<int>.filled(maxi + 1, 0);
List<int> dSwap;
for(int i = 0; i < maxi; i++){
dCurrent[i] = i;
}
int jm1 = 0, im1 = 0, im2 = -1;
for(int j = 1; j <= maxj; j++){
// rotate
dSwap = dMinus2;
dMinus2 = dMinus1;
dMinus1 = dCurrent;
dCurrent = dSwap;
// initialize
int minDistance = maxThreshold;
dCurrent[0] = j;
im1 = 0;
im2 = -1;
for(int i = 1; i <= maxi; i++){
int cost = source[im1] == target[jm1] ? 0 : 1;
int del = dCurrent[im1] + 1;
int ins = dMinus1[i] + 1;
int sub = dMinus1[im1] + cost;
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del);
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){
final possMin = dMinus2[im2] + cost;
min = min < possMin ? min : possMin;
}
dCurrent[i] = min;
if(min < minDistance){
minDistance = min;
}
im1++;
im2++;
}
jm1++;
if(minDistance > threshold){
return maxThreshold;
}
}
int result = dCurrent[maxi];
return (result > threshold) ? maxThreshold : result;
}
// 1696
// 17665
int distance2(String source, String target, int threshold){
const maxThreshold = 255;
if(threshold > maxThreshold){
throw 'Max threshold is $maxThreshold';
}
// normalization -- case insensitive
source = source.toLowerCase();
target = target.toLowerCase();
// normalization -- source should always be shorter than or equal in length to target
if(source.length > target.length){
final tmp = target;
target = source;
source = tmp;
}
final length1 = source.length;
final length2 = target.length;
if(length2 - length1 > threshold){
return maxThreshold;
}
final maxi = length1;
final maxj = length2;
var dCurrent = new Uint8ClampedList(maxi + 1);
var dMinus1 = new Uint8ClampedList(maxi + 1);
var dMinus2 = new Uint8ClampedList(maxi + 1);
Uint8ClampedList dSwap;
for(int i = 0; i < maxi; i++){
dCurrent[i] = i;
}
int jm1 = 0, im1 = 0, im2 = -1;
for(int j = 1; j <= maxj; j++){
// rotate
dSwap = dMinus2;
dMinus2 = dMinus1;
dMinus1 = dCurrent;
dCurrent = dSwap;
// initialize
int minDistance = maxThreshold;
dCurrent[0] = j;
im1 = 0;
im2 = -1;
for(int i = 1; i <= maxi; i++){
int cost = source[im1] == target[jm1] ? 0 : 1;
int del = dCurrent[im1] + 1;
int ins = dMinus1[i] + 1;
int sub = dMinus1[im1] + cost;
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del);
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){
final possMin = dMinus2[im2] + cost;
min = min < possMin ? min : possMin;
}
dCurrent[i] = min;
if(min < minDistance){
minDistance = min;
}
im1++;
im2++;
}
jm1++;
if(minDistance > threshold){
return maxThreshold;
}
}
int result = dCurrent[maxi];
return (result > threshold) ? maxThreshold : result;
}
// 18234
int distance3(String source, String target, int threshold){
const maxThreshold = 255;
if(threshold > maxThreshold){
throw 'Max threshold is $maxThreshold';
}
// normalization -- case insensitive
source = source.toLowerCase();
target = target.toLowerCase();
// normalization -- source should always be shorter than or equal in length to target
if(source.length > target.length){
final tmp = target;
target = source;
source = tmp;
}
final length1 = source.length;
final length2 = target.length;
if(length2 - length1 > threshold){
return maxThreshold;
}
final maxi = length1;
final maxj = length2;
final mem = new Uint8ClampedList((maxi + 1) * 3);
// var dCurrent = new Uint8ClampedList(maxi + 1);
// var dMinus1 = new Uint8ClampedList(maxi + 1);
// var dMinus2 = new Uint8ClampedList(maxi + 1);
// Uint8ClampedList dSwap;
int dSwapOffset;
int dCurrentOffset = 0;
int dMinus1Offset = maxi + 1;
int dMinus2Offset = (maxi + 1) * 2;
for(int i = 0; i < maxi; i++){
mem[dCurrentOffset + i] = i;
}
int jm1 = 0, im1 = 0, im2 = -1;
for(int j = 1; j <= maxj; j++){
// rotate
dSwapOffset = dMinus2Offset;
dMinus2Offset = dMinus1Offset;
dMinus1Offset = dCurrentOffset;
dCurrentOffset = dSwapOffset;
// initialize
int minDistance = maxThreshold;
mem[dCurrentOffset] = j;
im1 = 0;
im2 = -1;
for(int i = 1; i <= maxi; i++){
int cost = source[im1] == target[jm1] ? 0 : 1;
int del = mem[dCurrentOffset + im1] + 1;
int ins = mem[dMinus1Offset + i] + 1;
int sub = mem[dMinus1Offset + im1] + cost;
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del);
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){
final possMin = mem[dMinus2Offset + im2] + cost;
min = min < possMin ? min : possMin;
}
mem[dCurrentOffset + i] = min;
if(min < minDistance){
minDistance = min;
}
im1++;
im2++;
}
jm1++;
if(minDistance > threshold){
return maxThreshold;
}
}
int result = mem[dCurrentOffset + maxi];
return (result > threshold) ? maxThreshold : result;
}
int distance4(String source, String target, int threshold){
const maxThreshold = 255;
if(threshold > maxThreshold){
throw 'Max threshold is $maxThreshold';
}
// normalization -- case insensitive
source = source.toLowerCase();
target = target.toLowerCase();
// normalization -- source should always be shorter than or equal in length to target
if(source.length > target.length){
final tmp = target;
target = source;
source = tmp;
}
final length1 = source.length;
final length2 = target.length;
if(length2 - length1 > threshold){
return maxThreshold;
}
final maxi = length1;
final maxj = length2;
final mem = new List<int>.filled((maxi + 1) * 3, 0);
// var dCurrent = new Uint8ClampedList(maxi + 1);
// var dMinus1 = new Uint8ClampedList(maxi + 1);
// var dMinus2 = new Uint8ClampedList(maxi + 1);
// Uint8ClampedList dSwap;
int dSwapOffset;
int dCurrentOffset = 0;
int dMinus1Offset = maxi + 1;
int dMinus2Offset = (maxi + 1) * 2;
for(int i = 0; i < maxi; i++){
mem[dCurrentOffset + i] = i;
}
int jm1 = 0, im1 = 0, im2 = -1;
for(int j = 1; j <= maxj; j++){
// rotate
dSwapOffset = dMinus2Offset;
dMinus2Offset = dMinus1Offset;
dMinus1Offset = dCurrentOffset;
dCurrentOffset = dSwapOffset;
// initialize
int minDistance = maxThreshold;
mem[dCurrentOffset] = j;
im1 = 0;
im2 = -1;
for(int i = 1; i <= maxi; i++){
int cost = source[im1] == target[jm1] ? 0 : 1;
int del = mem[dCurrentOffset + im1] + 1;
int ins = mem[dMinus1Offset + i] + 1;
int sub = mem[dMinus1Offset + im1] + cost;
int min = (del > ins) ? (ins > sub ? sub : ins) : (del > sub ? sub : del);
if(i > 1 && j > 1 && source[im2] == target[jm1] && source[im1] == target[j - 2]){
final possMin = mem[dMinus2Offset + im2] + cost;
min = min < possMin ? min : possMin;
}
mem[dCurrentOffset + i] = min;
if(min < minDistance){
minDistance = min;
}
im1++;
im2++;
}
jm1++;
if(minDistance > threshold){
return maxThreshold;
}
}
int result = mem[dCurrentOffset + maxi];
return (result > threshold) ? maxThreshold : result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment