Skip to content

Instantly share code, notes, and snippets.

@jimblandy
Created July 3, 2018 05:57
Show Gist options
  • Save jimblandy/389ac6de1e597be107735091eeb044c6 to your computer and use it in GitHub Desktop.
Save jimblandy/389ac6de1e597be107735091eeb044c6 to your computer and use it in GitHub Desktop.
Diff!
use std::cmp::min;
use std::iter::FromIterator;
#[derive(Debug)]
enum Edit {
Delete,
Insert(char),
Replace(char),
Keep,
}
fn min3<T: Ord>(a: T, b: T, c: T) -> T {
min(a, min(b, c))
}
fn diff(left: &str, right: &str) -> Vec<Edit> {
let mut distance = Vec::from_iter((0..left.len() + 1).map(|_| vec![0; right.len() + 1]));
for i in 0..left.len() + 1 {
distance[i][0] = i;
}
for j in 0..right.len() + 1 {
distance[0][j] = j;
}
for (lc, i) in left.chars().zip(1..) {
for (rc, j) in right.chars().zip(1..) {
// Pretend left and right can be indexed by character:
// If we can turn left[..i-1] into right[..j] in C edits, then we
// can turn left[..i] into right[..j] by doing those same edits and
// then deleting left[i-1], costing us C+1 edits.
let delete = distance[i-1][j] + 1;
// Similarly, if we can turn left[..i] into right[..j-1] in C edits,
// then we can turn left[..i] into right[..j] by doing those same
// edits and then inserting right[j-1], costing us C+1 edits.
let insert = distance[i][j-1] + 1;
// Finally, if we can turn left[..i-1] into right[..j-1] in C edits,
// then we can turn left[..i] into right[..j] in C edits if the
// final characters match, or C+1 edits if we have to replace it.
let replace = distance[i-1][j-1] + if lc == rc { 0 } else { 1 };
// Choose the cheapest option.
distance[i][j] = min3(delete, insert, replace);
}
}
println!("Levenshtein distance: {}", distance[left.len()][right.len()]);
for row in &distance {
println!("{:?}", row);
}
println!();
let mut edits = Vec::new();
let (mut i, mut j) = (left.len(), right.len());
let mut cj = right.chars().rev();
while i > 0 && j > 0 {
println!("{}, {}", i, j);
let here = distance[i][j];
if distance[i-1][j] + 1 == here {
edits.push(Edit::Delete);
i -= 1;
} else if distance[i][j-1] + 1 == here {
edits.push(Edit::Insert(cj.next().unwrap()));
j -= 1;
} else if distance[i-1][j-1] + 1 == here {
edits.push(Edit::Replace(cj.next().unwrap()));
i -= 1; j -= 1;
} else if distance[i-1][j-1] == here {
edits.push(Edit::Keep);
cj.next();
i -= 1; j -= 1;
} else {
panic!("Cost at [{}][{}] is unexpected!", i, j);
}
}
while i > 0 {
assert_eq!(distance[i-1][j] + 1, distance[i][j]);
edits.push(Edit::Delete);
i -= 1;
}
while j > 0 {
assert_eq!(distance[i][j-1] + 1, distance[i][j]);
edits.push(Edit::Insert(cj.next().unwrap()));
j -= 1;
}
edits.reverse();
edits
}
fn main() {
let args = Vec::from_iter(std::env::args());
println!("{:?}", diff(&args[1], &args[2]));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment