Last active
May 30, 2023 11:01
-
-
Save keleshev/fa7c868f0d1176434341024aed4eaeb7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let distance' ~bound a b = | |
let n = String.length a and m = String.length b in | |
assert (n >= m); | |
if n - m > bound then None else | |
let fill_bound = min m bound in | |
let previous = Array.make (m + 1) max_int in | |
for j = 0 to fill_bound do previous.(j) <- j done; | |
let current = Array.make (m + 1) max_int in | |
for i = 0 to n - 1 do | |
current.(0) <- i + 1; | |
(* Compute stripe indices *) | |
let stripe_start = max 0 (i - fill_bound) | |
and stripe_end = min m (i + fill_bound) - 1 in | |
(* Ignore entry left of leftmost *) | |
if stripe_start > 0 then current.(stripe_start) <- max_int; | |
for j = stripe_start to stripe_end do | |
current.(j + 1) <- | |
if a.[i] = b.[j] then | |
previous.(j) | |
else | |
let deletion = previous.(j + 1) | |
and insertion = current.(j) | |
and substitution = previous.(j) in | |
let smallest = min deletion (min insertion substitution) in | |
if smallest = max_int then max_int else smallest + 1; | |
done; | |
for j = 0 to m do previous.(j) <- current.(j) done; | |
done; | |
let distance = previous.(m) in | |
if distance > bound then None else Some distance | |
(** Bounded Levenshtein distance | |
Based on https://www.baeldung.com/cs/levenshtein-distance-computation | |
let m = min(length(a), length(b)) | |
Time: O(m * min(m, bound)) | |
Space: O(m) | |
By default, bound is max_int (unbounded). | |
*) | |
let distance ?(bound=max_int) a b = | |
let n = String.length a and m = String.length b in | |
if n > m then distance' ~bound a b else distance' ~bound b a | |
let test = | |
assert (distance ~bound:max_int "asdfghjkl" "lkjhgfdsa" = Some 8); | |
assert (distance ~bound:100 "asdfghjkl" "lkjhxfdsa" = Some 9); | |
assert (distance ~bound:2 "sydny meyer" "sydney meier" = Some 2); | |
assert (distance ~bound:2 "sydny meyer" "sydney meiex" = None); | |
assert (distance "<helljlkjlkjlkjlkjlkjlklkjlkjlo>" | |
"--hellosdfsdfsdfsdfsdfsdfsdfsdfsdfsdfsdf" = Some 36) | |
let (let*) = Option.bind | |
let test2 = | |
let contenders = ["pull"; "push"; "clone"; "init"; "filter-branch"] in | |
let typo = "puls" in | |
let contenders = List.filter_map (fun s -> | |
let* d = distance ~bound:2 typo s in | |
Some (d, s)) contenders in | |
assert (List.sort compare contenders = [1, "pull"; 2, "push"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment