Created
April 28, 2017 13:33
-
-
Save ogregoire/6eff7186fb73715924c2c1b044daee63 to your computer and use it in GitHub Desktop.
Levenshtein distance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Strings { | |
// From https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Java | |
public int levenshteinDistance (CharSequence lhs, CharSequence rhs) { | |
int len0 = lhs.length() + 1; | |
int len1 = rhs.length() + 1; | |
// the array of distances | |
int[] cost = new int[len0]; | |
int[] newcost = new int[len0]; | |
// initial cost of skipping prefix in String s0 | |
for (int i = 0; i < len0; i++) cost[i] = i; | |
// dynamically computing the array of distances | |
// transformation cost for each letter in s1 | |
for (int j = 1; j < len1; j++) { | |
// initial cost of skipping prefix in String s1 | |
newcost[0] = j; | |
// transformation cost for each letter in s0 | |
for(int i = 1; i < len0; i++) { | |
// matching current letters in both strings | |
int match = (lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1; | |
// computing cost for each transformation | |
int cost_replace = cost[i - 1] + match; | |
int cost_insert = cost[i] + 1; | |
int cost_delete = newcost[i - 1] + 1; | |
// keep minimum cost | |
newcost[i] = Math.min(Math.min(cost_insert, cost_delete), cost_replace); | |
} | |
// swap cost/newcost arrays | |
int[] swap = cost; cost = newcost; newcost = swap; | |
} | |
// the distance is the cost for transforming all letters in both strings | |
return cost[len0 - 1]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment