Skip to content

Instantly share code, notes, and snippets.

@rampion
Forked from jberryman/med.hs
Created March 22, 2012 13:44
Show Gist options
  • Save rampion/2158429 to your computer and use it in GitHub Desktop.
Save rampion/2158429 to your computer and use it in GitHub Desktop.
an optimized haskell Levenshtein distance, using the vector library
module Main where
import qualified Data.List as L
import qualified Data.Vector.Unboxed.Safe as V
import Control.Monad (replicateM)
import System.Random (randomRIO)
import Criterion.Main
med :: String -> String -> Int
med s1 = V.last . V.ifoldl' scanS1 costs_i . V.fromList
where vs1 = V.fromList s1
costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
scanS1 costs_W costSW_i c2 =
let v = V.zip vs1 costs_W
v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
scanVec (costSW, costS) (c1, costW) =
(costW, min (min costS costW + 1)
(costSW + if c1 == c2 then 0 else 2))
in snd $ V.unzip v'
med' :: String -> String -> Int
med' s1 = V.last . L.foldl' (uncurry . scanS1) costs_i . zip [0..]
where vs1 = V.fromList s1
costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
scanS1 costs_W costSW_i c2 =
let v = V.zip vs1 costs_W
v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
scanVec (costSW, costS) (c1, costW) =
(costW, min (min costS costW + 1)
(costSW + if c1 == c2 then 0 else 2))
in snd $ V.unzip v'
main :: IO ()
main = do
string1 <- replicateM 200 $ randomRIO ('a','z')
string2 <- replicateM 200 $ randomRIO ('a','z')
defaultMain [ bench "vectors" $ whnf (uncurry med) (string1, string2)
, bench "one list" $ whnf (uncurry med') (string1, string2)
]
{-
benchmarking vectors
mean: 642.2392 us, lb 639.2511 us, ub 645.0969 us, ci 0.950
std dev: 14.95784 us, lb 12.68023 us, ub 18.27670 us, ci 0.950
benchmarking one list
mean: 719.9067 us, lb 717.2804 us, ub 722.7214 us, ci 0.950
std dev: 13.96940 us, lb 12.11219 us, ub 16.36222 us, ci 0.950
-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment