Last active
August 29, 2015 14:20
-
-
Save SeijiEmery/8ea00d3a33b00a74305c to your computer and use it in GitHub Desktop.
Levenshtein distance (java)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// en.wikipedia.org/wiki/Levenshtein_distance | |
public static int levenshtein (String a, String b) { | |
final int n = a.length() + 1; | |
final int m = b.length() + 1; | |
int[] d = new int[n * m]; | |
for (int i = 0; i < n; ++i) | |
d[i] = i; | |
for (int j = 1; j < m; ++j) | |
d[j * n] = j; | |
// Since our 2d matrix is stored in a flat array, we can do some index optimizations: | |
for (int j = 0, r = n; r < n * m; ++j, r += n) { | |
for (int i = 0, k = r + 1; k < r + n; ++i, ++k) { | |
d[k] = Math.min( Math.min(d[k - 1], d[k - n]) + 1, | |
d[k - n - 1] + (a.charAt(i) != b.charAt(j) ? 1 : 0)); | |
} | |
} | |
return d[n * m - 1]; | |
} | |
// ...and we can also do a further optimization: since only two rows are ever accessed at the same time, we | |
// can scrap the matrix allocation and cut down on our memory overhead. | |
public static int levenshtein2 (String a, String b) { | |
int[] cur = new int[a.length() + 1]; | |
int[] prev = new int[a.length() + 1]; | |
for (int i = 0; i < n; ++i) | |
prev[i] = i; | |
for (int j = 0; j < b.length(); ++j) { | |
for (int i = 0, x = j; i < a.length(); ++i) { | |
x = cur[i+1] = Math.min(Math.min(x, prev[i+1]) + 1, prev[i] + (a.charAt(i) != b.charAt(j) ? 1 : 0)); | |
} | |
// Swap rows | |
int[] tmp = cur; cur = prev; prev = tmp; | |
} | |
return prev[a.length()]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment