Skip to content

Instantly share code, notes, and snippets.

@PaulBGD
Last active December 20, 2015 22:09
Show Gist options
  • Save PaulBGD/6202963 to your computer and use it in GitHub Desktop.
Save PaulBGD/6202963 to your computer and use it in GitHub Desktop.
Simple Fuzzy Search
public String findMatch(String arg, List<String> words, boolean ignoreRepeat) {
// Where the final word will be stored.
String word = null;
/*
* Sometimes two words will equally match (rarely). If we find one that matches, we
* increase this by one. Just in case the person wants it to return null, we have the
* ignoreRepeat.
*/
int repeated = 0;
for (String s : words) {
/*
* The magic behind this all. getLevenshteinDistance() compares two strings and
* returns how many different letters they have. So like fall and fell would
* return 1. The lower the number, the closer the match.
*/
int low = getLevenshteinDistance(s, arg);
// We have to have something to compare to
int lowest = getLevenshteinDistance(word, arg);
/*
* If this word is lower than the last, set all the variables to it. We also
* reset the repeated, as there are no repeats for this number.
*/
if (low < lowest) {
word = s;
repeated = 0;
lowest = low;
} else if (lowest == low) {
/*
* Now we check the lengths. Lets say you have the word "flop" and are
* matching it to "fly" and "op" (in a list in that order). Since they both
* are two letters away and op is last, it would pick op (which I believe fly
* would make more sense). We now compare which is the closest length to the
* word being checked.
*/
int wLength = (word.length() > arg.length() ? word.length() - arg.length() : arg
.length() - word.length());
int dLength = (s.length() > arg.length() ? s.length() - arg.length() : arg.length()
- s.length());
if (dLength < wLength) {
word = s;
repeated = 0;
lowest = low;
} else {
/*
* Another check. This checks if the string we are checking contains the
* search directly.
*/
if (s.contains(arg) && !word.contains(arg)) {
word = s;
repeated = 0;
lowest = low;
/*
* We switch it the other way around now.
*/
} else if (arg.contains(s) && !arg.contains(word)) {
word = s;
repeated = 0;
lowest = low;
} else {
/*
* Worst comes to worst, this ties with another word. Just in case
* someone doesn't want this, we make sure to have repeated not equal to
* 0.
*/
repeated++;
}
}
}
}
/*
* As I have said above, some people don't want a repeat. In this case, I return
* null.
*/
if (repeated != 0 && ignoreRepeat) {
return null;
} else {
return word;
}
}
private int getLevenshteinDistance(String s, String t) {
if (s == null || t == null) {
return 0;
}
int n = s.length();
int m = t.length();
if (n == 0) {
return m;
} else if (m == 0) {
return n;
}
if (n > m) {
String tmp = s;
s = t;
t = tmp;
n = m;
m = t.length();
}
int p[] = new int[n + 1];
int d[] = new int[n + 1];
int _d[];
int i, j, cost;
char t_j;
for (i = 0; i <= n; i++) {
p[i] = i;
}
for (j = 1; j <= m; j++) {
t_j = t.charAt(j - 1);
d[0] = j;
for (i = 1; i <= n; i++) {
cost = s.charAt(i - 1) == t_j ? 0 : 1;
d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
}
_d = p;
p = d;
d = _d;
}
return p[n];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment