Created
March 19, 2012 10:36
-
-
Save martiell/2106993 to your computer and use it in GitHub Desktop.
Textograms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char (ord, toLower, intToDigit) | |
import Data.Function (on) | |
import Data.List | |
import System.Environment (getArgs) | |
key char = case c of | |
c | c >= 'a' && c < 'p' -> 2 + (ord(c) - a) `div` 3 | |
c | c >= 'p' && c <= 's' -> 7 | |
c | c >= 't' && c <= 'v' -> 8 | |
c | c >= 'w' && c <= 'z' -> 9 | |
c | c `elem` "1'" -> 1 | |
_ -> 0 | |
where | |
a = ord('a') | |
c = toLower char | |
keys = map (intToDigit . key) | |
lower = map toLower | |
suffixes suffix str = drop (length str - length suffix) str == suffix | |
filterOut p = filter ( not . p ) | |
findMatches = concat . filterOut singleton . collate | |
where | |
singleton xs = tail xs == [] | |
collate = groupBy ((==) `on` fst) | |
keyPairs a = flip zip a $ map keys a -- [(numbers, word)] | |
preprocess = filterOut (suffixes "'s") . words | |
process = findMatches . sort . keyPairs | |
doFile file = do | |
ws <- readFile file | |
let xs = process $ preprocess ws | |
mapM_ (\y -> putStrLn $ fst y ++ " " ++ snd y) xs | |
main = mapM_ doFile =<< getArgs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment