Last active
December 16, 2015 06:59
-
-
Save phadej/5395560 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Control.Monad | |
import Control.Monad.ST | |
import Data.Array.ST | |
import Data.Array.Unboxed | |
import Criterion.Main (defaultMain, bgroup, bench, whnf) | |
import Debug.Trace | |
simple :: String -> String -> Int | |
simple [] b = length b | |
simple a [] = length a | |
{- Same from definition | |
simple a b | min (length a) (length b) == 0 | |
= max (length a) (length b) | |
-} | |
-- both are not null | |
simple a@(x:xs) b@(y:ys) = minimum [ simple xs b + 1 | |
, simple a ys + 1 | |
, simple xs ys + if x == y then 0 else 1 | |
] | |
-- Polynomial, using lazyness | |
polynomial :: String -> String -> Int | |
polynomial s t | |
| s == t = 0 | |
polynomial s [] = length s | |
polynomial [] t = length t | |
polynomial s t = a !! length s !! length t | |
where a = [[f i j | j <- [0..length t]] | i <- [0..length s]] | |
f 0 j = j | |
f i 0 = i | |
f i j = minimum [ (a !! i !! (j - 1)) + 1 | |
, (a !! (i - 1) !! j) + 1 | |
, (a !! (i - 1) !! (j - 1)) + if (s !! (i - 1)) == (t !! (j - 1)) then 0 else 1 | |
] | |
-- Loosely following the iterative C version | |
imperative :: String -> String -> Int | |
imperative a b | a == b = 0 | |
imperative [] b = length b | |
imperative a [] = length a | |
imperative a b = runST $ do | |
-- create two work vectors of integer distances | |
v0 <- newArray_ (0, length b) :: ST s (STUArray s Int Int) | |
v1 <- newArray_ (0, length b) :: ST s (STUArray s Int Int) | |
-- initialize v0 (the previous row of distances) | |
-- this row is A[0][i]: edit distance for an empty s | |
-- the distance is just the number of characters to delete from t | |
forM_ (range (0, length b)) $ \i -> do | |
writeArray v0 i i | |
forM_ (range (0, length a - 1)) $ \i -> do | |
-- calculate v1 (current row distances) from the previous row v0 | |
{- Debug | |
list <- getElems v0 | |
traceShow list (return ()) | |
--} | |
-- first element of v1 is A[i+1][0] | |
-- edit distance is delete (i+1) chars from s to match empty t | |
writeArray v1 0 (i + 1) | |
-- use formula to fill in the rest of the row | |
forM_ (range (0, length b - 1)) $ \j -> do | |
v1j <- readArray v1 j | |
v0j <- readArray v0 j | |
v0j1 <- readArray v0 $ j + 1 | |
writeArray v1 (j + 1) $ minimum [v1j + 1, v0j1 + 1, v0j + if (aArray ! i == bArray ! j) then 0 else 1] | |
-- copy v1 (current row) to v0 (previous row) for next interation | |
forM_ (range (0, length b)) $ \j -> do | |
t <- readArray v1 j | |
writeArray v0 j t | |
-- return | |
readArray v1 (length b) | |
where aArray = listArray (0, length a - 1) a :: UArray Int Char | |
bArray = listArray (0, length b - 1) b :: UArray Int Char | |
longword :: Int -> String | |
longword n = take n $ concat $ repeat "qwerty" | |
test :: String -> String -> Bool | |
test a b = s == p && s == i | |
where s = simple a b | |
p = polynomial a b | |
i = imperative a b | |
main :: IO () | |
main = defaultMain [ | |
bgroup "levenshtein" [ | |
bench "simple" $ whnf (simple a2) b2 | |
, bench "polynomial" $ whnf (polynomial a2) b2 | |
], | |
bgroup "polynomials" [ | |
bench "polynomial" $ whnf (polynomial a10) b10 | |
, bench "imperative" $ whnf (imperative a10) b10 | |
] | |
] | |
where a2 = longword2 ++ "kitten" ++ longword2 | |
b2 = longword2 ++ "sitting" ++ longword2 | |
a10 = longword10 ++ "kitten" ++ longword10 | |
b10 = longword10 ++ "sitting" ++ longword10 | |
longword2 = longword 2 | |
longword10 = longword 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
warming up | |
estimating clock resolution... | |
mean is 3.218045 us (160001 iterations) | |
found 1039 outliers among 159999 samples (0.6%) | |
748 (0.5%) high severe | |
estimating cost of a clock call... | |
mean is 94.39777 ns (22 iterations) | |
found 2 outliers among 22 samples (9.1%) | |
1 (4.5%) high mild | |
1 (4.5%) high severe | |
benchmarking levenshtein/simple | |
collecting 100 samples, 1 iterations each, in estimated 61.04770 s | |
mean: 632.8908 ms, lb 619.3514 ms, ub 649.5024 ms, ci 0.950 | |
std dev: 76.98222 ms, lb 65.24029 ms, ub 95.02371 ms, ci 0.950 | |
found 3 outliers among 100 samples (3.0%) | |
3 (3.0%) high mild | |
variance introduced by outliers: 85.219% | |
variance is severely inflated by outliers | |
benchmarking levenshtein/polynomial | |
mean: 29.04588 us, lb 28.44555 us, ub 29.80340 us, ci 0.950 | |
std dev: 3.446563 us, lb 2.864958 us, ub 4.431689 us, ci 0.950 | |
found 4 outliers among 100 samples (4.0%) | |
3 (3.0%) high mild | |
1 (1.0%) high severe | |
variance introduced by outliers: 84.198% | |
variance is severely inflated by outliers | |
benchmarking polynomials/polynomial | |
mean: 500.2735 us, lb 482.4497 us, ub 530.7803 us, ci 0.950 | |
std dev: 117.2728 us, lb 78.34104 us, ub 186.7606 us, ci 0.950 | |
found 6 outliers among 100 samples (6.0%) | |
2 (2.0%) high mild | |
4 (4.0%) high severe | |
variance introduced by outliers: 95.738% | |
variance is severely inflated by outliers | |
benchmarking polynomials/imperative | |
mean: 58.20797 us, lb 56.37413 us, ub 60.21839 us, ci 0.950 | |
std dev: 9.846183 us, lb 8.873440 us, ub 11.31322 us, ci 0.950 | |
variance introduced by outliers: 91.538% | |
variance is severely inflated by outliers |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment