BinRoot · January 1, 2016 14:19 · atondwal · Dec 28, 2013 · BinRoot · Dec 28, 2013
diff --git a/download.sh b/download.sh
 #!/bin/bash

 # download the code
 wget https://gist.github.com/BinRoot/8157165/raw/9109a51173b76c4e233fd00165fb25e62b249aa5/spell.hs

 # download big.txt
 wget http://norvig.com/big.txt

 # run the code
 runhaskell spell
diff --git a/spell.hs b/spell.hs
 import Data.Char (isAlpha, isSpace, toLower)
 import Data.List (group, sort, sortBy)
 import Data.Map (fromListWith, Map, member, (!))

 main = do
  rawText <- readFile "big.txt" -- http://norvig.com/big.txt
  let m = train $ getWords rawText
  let sentence = "such codez many haskell very spellz so korrect"
  print $ autofix m sentence
  
 autofix m sentence = unwords $ map (correct m) (words sentence)
  
 getWords str = words $ filter (\x -> isAlpha x || isSpace x) lower
  where lower = map toLower str
        
 train :: Ord a => [a] -> Map a Int
 train = fromListWith (+) . (`zip` repeat 1)

 edits1 word = unique $ deletes ++ transposes ++ replaces ++ inserts
  where splits     = [ (take i word', drop i word') | 
                       i <- [0..length word']]
        deletes    = [ a ++ (tail b) | 
                       (a,b) <- splits, (not.null) b]
        transposes = [ a ++ [b !! 1] ++ [head b] ++ (drop 2 b) | 
                       (a,b) <- splits, length b > 1 ]
        replaces   = [ a ++ [c] ++ (drop 1 b) | 
                       (a,b) <- splits, c <- alphabet, (not.null) b ]
        inserts    = [a ++ [c] ++ b | 
                      (a,b) <- splits, c <- alphabet ]
        alphabet   = ['a'..'z']
        word'      = map toLower word
        
 editsN word n = last $ take n $ iterate edits1' $ edits1 word
  where edits1' ls = unique $ concat $ map edits1 ls

 unique = map head.group.sort

 known [] _ = []
 known (w:ws) m
  | w `member` m = w : known ws m
  | otherwise    = known ws m
                   
 correct m word = head $ sortBy (\a b -> compare (m!b) (m!a)) candidates
  where candidates = head $ filter (not.null) 
                     [ known [word] m 
                     , known (edits1 word) m
                     , known (editsN word 2) m
                     , [word] ]
	#!/bin/bash

	# download the code
	wget https://gist.github.com/BinRoot/8157165/raw/9109a51173b76c4e233fd00165fb25e62b249aa5/spell.hs

	# download big.txt
	wget http://norvig.com/big.txt

	# run the code
	runhaskell spell
	import Data.Char (isAlpha, isSpace, toLower)
	import Data.List (group, sort, sortBy)
	import Data.Map (fromListWith, Map, member, (!))

	main = do
	rawText <- readFile "big.txt" -- http://norvig.com/big.txt
	let m = train $ getWords rawText
	let sentence = "such codez many haskell very spellz so korrect"
	print $ autofix m sentence

	autofix m sentence = unwords $ map (correct m) (words sentence)

	getWords str = words $ filter (\x -> isAlpha x \|\| isSpace x) lower
	where lower = map toLower str

	train :: Ord a => [a] -> Map a Int
	train = fromListWith (+) . (`zip` repeat 1)

	edits1 word = unique $ deletes ++ transposes ++ replaces ++ inserts
	where splits = [ (take i word', drop i word') \|
	i <- [0..length word']]
	deletes = [ a ++ (tail b) \|
	(a,b) <- splits, (not.null) b]
	transposes = [ a ++ [b !! 1] ++ [head b] ++ (drop 2 b) \|
	(a,b) <- splits, length b > 1 ]
	replaces = [ a ++ [c] ++ (drop 1 b) \|
	(a,b) <- splits, c <- alphabet, (not.null) b ]
	inserts = [a ++ [c] ++ b \|
	(a,b) <- splits, c <- alphabet ]
	alphabet = ['a'..'z']
	word' = map toLower word

	editsN word n = last $ take n $ iterate edits1' $ edits1 word
	where edits1' ls = unique $ concat $ map edits1 ls

	unique = map head.group.sort

	known [] _ = []
	known (w:ws) m
	\| w `member` m = w : known ws m
	\| otherwise = known ws m

	correct m word = head $ sortBy (\a b -> compare (m!b) (m!a)) candidates
	where candidates = head $ filter (not.null)
	[ known [word] m
	, known (edits1 word) m
	, known (editsN word 2) m
	, [word] ]