Created
July 3, 2018 05:57
-
-
Save jimblandy/389ac6de1e597be107735091eeb044c6 to your computer and use it in GitHub Desktop.
Diff!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::cmp::min; | |
use std::iter::FromIterator; | |
#[derive(Debug)] | |
enum Edit { | |
Delete, | |
Insert(char), | |
Replace(char), | |
Keep, | |
} | |
fn min3<T: Ord>(a: T, b: T, c: T) -> T { | |
min(a, min(b, c)) | |
} | |
fn diff(left: &str, right: &str) -> Vec<Edit> { | |
let mut distance = Vec::from_iter((0..left.len() + 1).map(|_| vec![0; right.len() + 1])); | |
for i in 0..left.len() + 1 { | |
distance[i][0] = i; | |
} | |
for j in 0..right.len() + 1 { | |
distance[0][j] = j; | |
} | |
for (lc, i) in left.chars().zip(1..) { | |
for (rc, j) in right.chars().zip(1..) { | |
// Pretend left and right can be indexed by character: | |
// If we can turn left[..i-1] into right[..j] in C edits, then we | |
// can turn left[..i] into right[..j] by doing those same edits and | |
// then deleting left[i-1], costing us C+1 edits. | |
let delete = distance[i-1][j] + 1; | |
// Similarly, if we can turn left[..i] into right[..j-1] in C edits, | |
// then we can turn left[..i] into right[..j] by doing those same | |
// edits and then inserting right[j-1], costing us C+1 edits. | |
let insert = distance[i][j-1] + 1; | |
// Finally, if we can turn left[..i-1] into right[..j-1] in C edits, | |
// then we can turn left[..i] into right[..j] in C edits if the | |
// final characters match, or C+1 edits if we have to replace it. | |
let replace = distance[i-1][j-1] + if lc == rc { 0 } else { 1 }; | |
// Choose the cheapest option. | |
distance[i][j] = min3(delete, insert, replace); | |
} | |
} | |
println!("Levenshtein distance: {}", distance[left.len()][right.len()]); | |
for row in &distance { | |
println!("{:?}", row); | |
} | |
println!(); | |
let mut edits = Vec::new(); | |
let (mut i, mut j) = (left.len(), right.len()); | |
let mut cj = right.chars().rev(); | |
while i > 0 && j > 0 { | |
println!("{}, {}", i, j); | |
let here = distance[i][j]; | |
if distance[i-1][j] + 1 == here { | |
edits.push(Edit::Delete); | |
i -= 1; | |
} else if distance[i][j-1] + 1 == here { | |
edits.push(Edit::Insert(cj.next().unwrap())); | |
j -= 1; | |
} else if distance[i-1][j-1] + 1 == here { | |
edits.push(Edit::Replace(cj.next().unwrap())); | |
i -= 1; j -= 1; | |
} else if distance[i-1][j-1] == here { | |
edits.push(Edit::Keep); | |
cj.next(); | |
i -= 1; j -= 1; | |
} else { | |
panic!("Cost at [{}][{}] is unexpected!", i, j); | |
} | |
} | |
while i > 0 { | |
assert_eq!(distance[i-1][j] + 1, distance[i][j]); | |
edits.push(Edit::Delete); | |
i -= 1; | |
} | |
while j > 0 { | |
assert_eq!(distance[i][j-1] + 1, distance[i][j]); | |
edits.push(Edit::Insert(cj.next().unwrap())); | |
j -= 1; | |
} | |
edits.reverse(); | |
edits | |
} | |
fn main() { | |
let args = Vec::from_iter(std::env::args()); | |
println!("{:?}", diff(&args[1], &args[2])); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment