Last active
November 7, 2015 08:19
-
-
Save rainyear/94b5d9a865601f075719 to your computer and use it in GitHub Desktop.
Spell corrector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char (toLower) | |
import qualified Data.Map as Map -- (insertWith, empty, member) | |
import qualified Data.Set as Set -- (fromList, toList, union, unions) | |
main :: IO () | |
main = do | |
fileStr <- readFile "big.txt" | |
let training = words $ toLower <$> fileStr | |
let knowledge = foldl (\m word -> Map.insertWith (+) word 1 m) Map.empty training | |
inputWord <- getLine | |
let candidates = Set.filter (`Map.member` knowledge) <$> [Set.fromList [inputWord], editOne inputWord, editTwo inputWord] | |
let realCand = head $ filter (not . Set.null) $ candidates ++ [Set.fromList [inputWord]] | |
-- print [(Map.findWithDefault 1 x knowledge, x) | x <- Set.toList realCand] | |
print $ snd $ maximum [(Map.findWithDefault 1 x knowledge, x) | x <- Set.toList realCand] | |
wordSplit word = take <$> [0..length word] <*> [word] | |
splits word = zip (wordSplit word) (reverse $ reverse <$> wordSplit (reverse word)) | |
editDel word = [x ++ tail y | (x, y) <- splits word, not $ null y] | |
editTrans word = [x ++ [y!!1] ++ [head y] ++ drop 2 y | (x, y) <- splits word, length y > 1] | |
editReplace word = [x ++ [c] ++ y | (x, y) <- splits word, c <- ['a'..'z']] | |
editInsert word = [x ++ [c] ++ tail y | (x, y) <- splits word, c <- ['a'..'z'], not $ null y] | |
editOne word = Set.fromList $ concat $ [editDel, editTrans, editReplace, editInsert] <*> [word] | |
editTwo word = Set.unions $ editOne <$> Set.toList (editOne word) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment