Created
October 1, 2018 11:34
-
-
Save emiflake/4e4b1353df6c66611facbdef0d425364 to your computer and use it in GitHub Desktop.
JSON Parser in Haskell, more or less compliant
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Main where | |
import Network.HTTP | |
import Data.Attoparsec.ByteString.Char8 | |
import Data.Attoparsec.Combinator | |
import qualified Data.ByteString as BS | |
import qualified Data.ByteString.Char8 as C8 | |
import Data.Word | |
import Control.Applicative | |
import Text.Pretty.Simple | |
data Value = Number Double | |
| Boolean Bool | |
| String String | |
| Array [Value] | |
| Object [(Value, Value)] | |
| Null | |
deriving (Show, Eq) | |
whitespace :: Parser () | |
whitespace = try (many (satisfy (inClass " \n\r\t"))) >> pure () | |
parseNumber :: Parser Value | |
parseNumber = do | |
n <- scientific | |
pure . Number $ fromRational (toRational n) | |
parseBoolean :: Parser Value | |
parseBoolean = Boolean . (=="true") <$> (string "true" <|> string "false") | |
escape :: Parser String | |
escape = do | |
d <- char '\\' | |
c <- choice (map char "\\\"0nrvtbf") -- all the characters which can be escaped | |
return [d, c] | |
nonEscape :: Parser Char | |
nonEscape = satisfy (notInClass "\\\"\0\n\r\v\t\b\f") | |
character :: Parser String | |
character = fmap pure nonEscape <|> escape | |
parseString :: Parser Value | |
parseString = do | |
char '"' | |
chars <- many character | |
char '"' | |
pure . String $ concat chars | |
commaSep :: Parser a -> Parser [a] | |
commaSep p = p `sepBy` (whitespace >> char ',' >> whitespace) | |
parseNull :: Parser Value | |
parseNull = string "null" >> pure Null | |
parseArray :: Parser Value | |
parseArray = do | |
whitespace | |
char '[' | |
whitespace | |
values <- commaSep parseValue | |
whitespace | |
char ']' | |
whitespace | |
pure . Array $ values | |
parseObject :: Parser Value | |
parseObject = do | |
whitespace | |
char '{' | |
whitespace | |
values <- commaSep keyValPair | |
whitespace | |
char '}' | |
whitespace | |
pure . Object $ values | |
keyValPair :: Parser (Value, Value) | |
keyValPair = do | |
whitespace | |
key <- parseString | |
whitespace | |
char ':' | |
whitespace | |
value <- parseValue | |
whitespace | |
pure (key, value) | |
parseValue :: Parser Value | |
parseValue = parseNumber <|> parseNull <|> parseBoolean <|> parseString <|> parseArray <|> parseObject | |
numberExample :: BS.ByteString | |
numberExample = "1.0" | |
stringExample :: BS.ByteString | |
stringExample = "\"Hello, world!\"" | |
boolExample :: BS.ByteString | |
boolExample = "false" | |
arrayExample :: BS.ByteString | |
arrayExample = "[1, 2, \"I'm a string\"]" | |
objectExample :: BS.ByteString | |
objectExample = " { \"foo\" \n : true \n} " | |
whitespaceExample :: BS.ByteString | |
whitespaceExample = " \n\t\r[\n\"\\\"Cool\"\n]\n" | |
shouldParse :: BS.ByteString -> IO () | |
shouldParse s = case parseOnly parseValue s of | |
Left e -> error $ "Error at string " ++ show s ++ ": " ++ e | |
_ -> pure () | |
test = do | |
mapM_ shouldParse [ numberExample | |
, stringExample | |
, boolExample | |
, arrayExample | |
, objectExample | |
, whitespaceExample] | |
print "All tests passed" | |
main :: IO () | |
main = do | |
-- BS.getLine >>= print . show . parseOnly parseValue >> main | |
-- BS.readFile "crosssell.json" >>= putStrLn . Prelude.take 100 . C8.unpack | |
-- BS.readFile "crosssell.json" >>= pPrint . C8.take 1000 | |
BS.readFile "tumblr.json" >>= putStrLn . Prelude.take 2000 . show . parseOnly parseValue . C8.concat . C8.lines |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment