Created
June 20, 2011 16:52
-
-
Save shangaslammi/1035979 to your computer and use it in GitHub Desktop.
KataBankOCR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.List | |
import Data.Maybe | |
import Control.Monad | |
import Control.Arrow | |
import System.Environment | |
newtype Account = Account { toList :: [Maybe Int] } deriving (Eq) | |
data Status = OK | Illegible | Error | Amb [Account] deriving (Eq) | |
instance Show Status where | |
show OK = "" | |
show Illegible = "ILL" | |
show Error = "ERR" | |
show (Amb acs) = "AMB " ++ show acs | |
instance Show Account where | |
show = concatMap (maybe "?" show) . toList | |
linesPerAccount = 4 | |
columnsPerDigit = 3 | |
charsPerLine = 27 | |
charsPerDigit = 9 | |
template = [ | |
" _ _ _ _ _ _ _ _ ", | |
"| | | _| _||_||_ |_ ||_||_|", | |
"|_| ||_ _| | _||_| ||_| _|", | |
" " ] | |
{-- | |
Usage: ocr <filename> | |
Reads account numbers from given file and outputs | |
parsed results to stdout. | |
--} | |
main = getFileName >>= readFile >>= processFile >>= putStrLn | |
where getFileName = liftM head getArgs | |
processFile = return . formatAccounts . parseAccounts | |
formatAccounts = unlines . map formatAccount | |
formatAccount = (show *** show) >>> (\(a,b)-> a ++ " " ++ b) | |
-- | Parse a list of accounts from input string | |
parseAccounts :: String -> [(Account, Status)] | |
parseAccounts = map parseAccount . splitLines | |
where splitLines = groupN linesPerAccount . map normalize . lines | |
normalize = take charsPerLine . (++ repeat ' ') | |
-- | Parse a single account from a list of lines presentation | |
parseAccount :: [String] -> (Account, Status) | |
parseAccount = uncurry pickValid . (Account . map ocr &&& amb) . packChars | |
-- | Take the lines of an account presentation and return a list that contains | |
-- the cells for individual digits. | |
packChars :: [String] -> [String] | |
packChars = map concat . transpose . map (groupN columnsPerDigit) . init | |
-- | OCR the packed presentation of a single digit to an int or Nothing if the | |
-- digit was illegible. | |
ocr :: String -> Maybe Int | |
ocr = flip lookup digits | |
digits = zip (packChars template) [0..9] | |
-- | Generate all possible account numbers assuming that one character in the | |
-- input is corrupted. | |
amb :: [String] -> [Account] | |
amb acc = map Account $ do | |
(i,d) <- zip [0..] acc | |
(j,c) <- zip [0..] d | |
c' <- if c == ' ' then "|_" else " " | |
let d' = replace j c' d | |
let o' = ocr d' | |
guard $ isJust o' | |
return $ replace i o' ocrAcc | |
where ocrAcc = map ocr acc | |
-- | Determine the status of the OCR result. | |
pickValid :: Account -> [Account] -> (Account, Status) | |
pickValid acc alts | |
| status == OK = (acc, OK) | |
| otherwise = case valids of | |
[] -> (acc, status) | |
[x] -> (x, OK) | |
_ -> (acc, Amb valids) | |
where status = validate acc | |
valids = filterValid alts | |
filterValid = filter ((OK==).validate) | |
-- | Validate an account number via checksum | |
validate :: Account -> Status | |
validate = maybe Illegible status . calcCheckSum . toList | |
where status s = if (mod s 11) == 0 then OK else Error | |
calcCheckSum = maybeSum . zipWith (liftM2 (*)) (map Just [9,8..1]) | |
maybeSum = foldr (liftM2 (+)) (Just 0) | |
-- | Group a list into sublists of length n | |
groupN :: Int -> [a] -> [[a]] | |
groupN n = takeWhile (not.null) . unfoldr (Just . splitAt n) | |
-- | Replace the nth member of a list with a new item | |
replace :: Int -> a -> [a] -> [a] | |
replace i x = (second tail >>> second (x:) >>> uncurry (++)) . splitAt i |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment