Created
July 10, 2014 11:29
-
-
Save icecrime/caf83a5c9116b4144d91 to your computer and use it in GitHub Desktop.
Haskell Digits-Recognizer Dojo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char | |
import Data.List.Split | |
import qualified Data.Vector as V | |
import Data.Ord | |
import qualified Data.Text as T | |
import qualified Data.Text.IO as T.IO | |
import qualified Data.Text.Read as T.Read | |
type Pixels = V.Vector Int | |
data Digit = Digit { label :: Int, pixels :: Pixels } | |
distance :: Pixels -> Pixels -> Float | |
distance d1 d2 = sqrt . V.sum $ V.map pointDistance $ V.zip d1 d2 | |
where pointDistance (a, b) = fromIntegral $ (a - b) * (a - b) | |
parseDigit :: T.Text -> Digit | |
parseDigit s = Digit label (V.fromList pixels) | |
where (label:pixels) = map toDigit $ T.splitOn (T.pack ",") s | |
toDigit s = either (\_ -> 0) fst (T.Read.decimal s) | |
identify :: Digit -> V.Vector Digit -> (Digit, Float) | |
identify digit training = V.minimumBy (comparing snd) distances | |
where distances = V.map fn training | |
fn ref = (ref, distance (pixels digit) (pixels ref)) | |
readDigits :: String -> IO (V.Vector Digit) | |
readDigits filename = do | |
fileContent <- T.IO.readFile filename | |
return $ V.map parseDigit $ V.fromList $ tail $ T.lines fileContent | |
main :: IO () | |
main = do | |
trainingSample <- readDigits "trainingsample.csv" | |
validationSample <- readDigits "validationsample.csv" | |
let result = V.map (\d -> (d, identify d trainingSample)) validationSample | |
fmt (d, (ref, dist)) = putStrLn $ "Found " ++ show (label ref) ++ " for " ++ show (label d) ++ " (distance=" ++ show dist ++ ")" | |
V.mapM_ fmt result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment