Skip to content

Instantly share code, notes, and snippets.

@lovasoa
Created June 13, 2018 13:44
Show Gist options
  • Save lovasoa/cfa308bbd5799930f8450d571d22edc1 to your computer and use it in GitHub Desktop.
Save lovasoa/cfa308bbd5799930f8450d571d22edc1 to your computer and use it in GitHub Desktop.
import Data.Maybe
import Data.List
import Data.Ord
data DicTrie = DicTrie [(Char, DicTrie)] deriving Show
emptyTrie = DicTrie []
mainDict = DicTrie [
('e', DicTrie[
('n', DicTrie[
('t', emptyTrie)
])
]),
('t', DicTrie[
('r', DicTrie[
('e', emptyTrie)
])
])
]
lookupTrie :: Char -> DicTrie -> Maybe DicTrie
lookupTrie chr (DicTrie dct) = lookup chr dct
type Tokens = [String]
pathCost :: Tokens -> (Int, Int)
pathCost xs = (length xs, -foldl max 0 (map length xs))
tokenize :: DicTrie -> String -> Tokens
tokenize _ "" = []
tokenize dict str = minimumBy (comparing pathCost) (map (\(prefix,rest) -> prefix : tokenize dict rest) $ prefixes dict str)
prefixes :: DicTrie -> String -> [(String,String)]
prefixes dct str =
let
(pfx, sstr) = splitPrefixAlways dct str
pfxs = drop 1 $ zip (inits pfx) (tails pfx)
in map (\(init,tail) -> (init, tail ++ sstr)) pfxs
splitPrefixAlways :: DicTrie -> String -> (String, String)
splitPrefixAlways dct str =
let (pfx,sstr) = splitPrefix dct str
in if null pfx then splitAt 1 str else (pfx,sstr)
splitPrefix :: DicTrie -> String -> (String, String)
splitPrefix dict "" = ("", "")
splitPrefix dict (chr:str) =
case lookupTrie chr dict of
Just subdict ->
let (nextPfx, sstr) = splitPrefix subdict str
in (chr:nextPfx, sstr)
Nothing -> ("", chr:str)
main = print $ tokenize mainDict "entraineraient tres tres bien entre les trognons qui rentrent"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment