Skip to content

Instantly share code, notes, and snippets.

@matthewjberger
Last active August 12, 2023 18:20
Show Gist options
  • Save matthewjberger/830fb8540bc3e74286fb673b550f8b58 to your computer and use it in GitHub Desktop.
Save matthewjberger/830fb8540bc3e74286fb673b550f8b58 to your computer and use it in GitHub Desktop.
Fuzzy search string lists in rust using Levehnstein distance
fn levenshtein(a: &str, b: &str) -> usize {
let (a_len, b_len) = (a.chars().count(), b.chars().count());
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
for i in 0..=a_len {
matrix[i][0] = i;
}
for j in 0..=b_len {
matrix[0][j] = j;
}
for (i, ca) in a.chars().enumerate() {
for (j, cb) in b.chars().enumerate() {
let cost = if ca == cb { 0 } else { 1 };
let delete = matrix[i][j + 1] + 1;
let insert = matrix[i + 1][j] + 1;
let substitute = matrix[i][j] + cost;
matrix[i + 1][j + 1] = std::cmp::min(delete, std::cmp::min(insert, substitute));
}
}
matrix[a_len][b_len]
}
fn fuzzy_search<'a>(query: &str, data: &'a [&'a str], max_distance: usize) -> Vec<&'a str> {
data.iter()
.filter(|&&candidate| levenshtein(query, candidate) <= max_distance)
.cloned()
.collect()
}
#[test]
fn test_fuzzy_search() {
let data = [
"apple",
"banana",
"cherry",
"date",
"grape",
"blueberry",
"apricot",
];
let query = "aple";
let results = fuzzy_search(query, &data, 1);
assert_eq!(results, vec!["apple"]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment