Last active
March 27, 2019 18:16
-
-
Save lojic/6745960 to your computer and use it in GitHub Desktop.
Simple Haskell example to compute the "edit distance" between two strings. I also translated the Haskell program into Ruby as a comparison. The Ruby version is about 21x slower (when transforming "mountain lion" to "mavericks"). The algorithm is naive & inefficient, but it's the same algorithm in both languages.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- From Simon Thompson's excellent "The Craft of Functional Programming" | |
-- | |
-- Find the "edit distance" between two strings using five basic | |
-- editing operations: | |
-- | |
-- 1) Change one character into another | |
-- 2) Copy a character | |
-- 3) Delete a character | |
-- 4) Insert a character | |
-- 5) Delete (kill) to the end of the string | |
data Edit = Change Char | | |
Copy | | |
Delete | | |
Insert Char | | |
Kill | |
deriving (Eq,Show) | |
transform :: String -> String -> [ Edit ] | |
transform [] [] = [] | |
transform xs [] = [ Kill ] | |
transform [] ys = map Insert ys | |
transform (x:xs) (y:ys) | |
| x==y = Copy : transform xs ys | |
| otherwise = best [ Delete : transform xs (y:ys), | |
Insert y : transform (x:xs) ys, | |
Change y : transform xs ys ] | |
best :: [[Edit]] -> [Edit] | |
best [x] = x | |
best (x:xs) | |
| cost x <= cost b = x | |
| otherwise = b | |
where b = best xs | |
cost :: [Edit] -> Int | |
cost = length . filter (/=Copy) | |
-- Examples | |
-- transform "fish" "chips" => [Insert 'c',Change 'h',Copy,Insert 'p',Copy,Kill] | |
-- transform "mountain lion" "mavericks" => | |
-- [Copy,Delete,Change 'a',Change 'v',Change 'e',Change 'r',Copy,Insert 'c',Insert 'k',Insert 's',Kill] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A translation of the Haskell program into Ruby | |
require 'pp' | |
def transform s1, s2 | |
return [] if s1.empty? && s2.empty? | |
return [ :kill ] if !s1.empty? && s2.empty? | |
return s2.chars.map {|c| [ :insert, c ] } if s1.empty? && !s2.empty? | |
return [ :copy ] + transform(s1[1..-1],s2[1..-1]) if s1[0] == s2[0] | |
best [ [ :delete ] + transform(s1[1..-1], s2), | |
[[ :insert, s2[0]]] + transform(s1, s2[1..-1]), | |
[[ :change, s2[0]]] + transform(s1[1..-1], s2[1..-1])] | |
end | |
def best lst | |
x = lst.first | |
return x if lst.length == 1 | |
b = best(lst[1..-1]) | |
return x if cost(x) <= cost(b) | |
b | |
end | |
def cost edits | |
edits.select {|e| e != :copy }.length | |
end | |
pp transform("mountain lion", "mavericks") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Without comments and type declarations | |
data Edit = Change Char | Copy | Delete | Insert Char | Kill deriving (Eq,Show) | |
transform [] [] = [] | |
transform xs [] = [ Kill ] | |
transform [] ys = map Insert ys | |
transform (x:xs) (y:ys) | x==y = Copy : transform xs ys | |
| otherwise = best [ Delete : transform xs (y:ys), | |
Insert y : transform (x:xs) ys, | |
Change y : transform xs ys ] | |
best [x] = x | |
best (x:xs) | cost x <= cost b = x | |
| otherwise = b | |
where b = best xs | |
cost = length . filter (/=Copy) | |
main = do | |
putStr $ show $ transform "mountain lion" "mavericks" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment