Created
February 15, 2019 14:48
-
-
Save StephenWakely/95eb54b9e0780ab62311e6f292edd88c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | Code for calculating the levenstein distance between two strings | |
module Data.Levenstein (distance) where | |
import Prelude | |
import Data.Array as Array | |
import Data.Array ((!!)) | |
import Data.Function.Memoize (memoize2) | |
import Data.Maybe as Maybe | |
import Data.Newtype (unwrap) | |
import Data.String (CodePoint) | |
import Data.String as String | |
import Data.Ord.Min (Min(..)) | |
import Partial.Unsafe (unsafePartial) | |
min3 ∷ ∀ a. Ord a ⇒ a → a → a → a | |
min3 x y z = | |
unwrap $ Min x <> Min y <> Min z | |
unsafeIndex ∷ ∀ a. Array a → Int → a | |
unsafeIndex arr idx = | |
unsafePartial $ Maybe.fromJust $ arr !! idx | |
infixl 8 unsafeIndex as !!! | |
-- | Calculates the distance from the code points | |
distanceCodePoints ∷ Array CodePoint → Array CodePoint → Int | |
distanceCodePoints x y = | |
d (Array.length x) (Array.length y) | |
where | |
d = memoize2 $ \m n → dist m n | |
dist i 0 = i | |
dist 0 j = j | |
dist i j = | |
let | |
a = (d (i - 1) j) + 1 | |
b = (d i (j - 1)) + 1 | |
c = (d (i - 1) (j - 1)) + if x !!! (i - 1) == y !!! (j - 1) then 0 else 1 | |
in | |
min3 a b c | |
-- | Calculates the levenshtein distance between two strings - this is the number of changes required to get from one string to the other.. | |
distance ∷ String → String → Int | |
distance x y = | |
distanceCodePoints (String.toCodePointArray x) (String.toCodePointArray y) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment