Created
July 4, 2019 12:36
-
-
Save christianscott/cc1e4f7bc6185c4f230933dc5a9686aa to your computer and use it in GitHub Desktop.
Levenshtein edit distance (rust)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::cmp::{self, Ord}; | |
pub fn levenshtein_distance(source: &str, target: &str) -> usize { | |
if source.is_empty() || target.is_empty() { | |
return 0; | |
} | |
let source_chars: Vec<char> = source.chars().collect(); | |
let target_chars: Vec<char> = target.chars().collect(); | |
let mut last_row: Vec<usize> = (0..=target.len()).collect(); | |
for (i, source_char) in source_chars.iter().enumerate() { | |
let mut next_dist = i + 1; | |
for (j, target_char) in target_chars.iter().enumerate() { | |
let current_dist = next_dist; | |
let dist_if_substitute = if source_char == target_char { | |
last_row[j] | |
} else { | |
last_row[j] + 1 | |
}; | |
let dist_if_insert = current_dist + 1; | |
let dist_if_delete = last_row[j + 1] + 1; | |
next_dist = min_3(dist_if_substitute, dist_if_insert, dist_if_delete); | |
last_row[j] = current_dist; | |
} | |
last_row[target.len()] = next_dist; | |
} | |
last_row[target.len()] | |
} | |
fn min_3<T: Ord>(a: T, b: T, c: T) -> T { | |
cmp::min(a, cmp::min(b, c)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment