Created
June 18, 2020 18:28
-
-
Save oxalica/49660aa6cc99c46dfcadb2aff4f6c29a to your computer and use it in GitHub Desktop.
A toy parser for json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- https://www.json.org/json-en.html | |
| {-# LANGUAGE OverloadedStrings #-} | |
| import Data.Map.Strict (Map, fromList) | |
| import qualified Data.Text as T | |
| import Data.Char (isDigit, chr, isHexDigit) | |
| import Control.Monad (ap) | |
| import Control.Applicative (Alternative(..), optional) | |
| newtype Parser a = Parser { runParser :: T.Text -> Either String (T.Text, a) } | |
| instance Functor Parser where | |
| fmap f (Parser p) = Parser $ \inp -> fmap f <$> p inp | |
| instance Applicative Parser where | |
| pure v = Parser $ \inp -> Right (inp, v) | |
| (<*>) = ap | |
| instance Alternative Parser where | |
| empty = fail "All parse branch failed" | |
| Parser p <|> Parser q = Parser $ \inp -> case p inp of | |
| Left _ -> q inp | |
| Right ret -> Right ret | |
| instance Monad Parser where | |
| return = pure | |
| Parser p >>= f = Parser $ \inp -> p inp >>= \(rest, v) -> runParser (f v) rest | |
| instance MonadFail Parser where | |
| fail = Parser . const . Left | |
| eat :: Int -> Parser T.Text | |
| eat n = Parser $ \inp -> if T.length inp >= n | |
| then let (v, rest) = T.splitAt n inp in Right (rest, v) | |
| else Left $ "Expecting " ++ show (n - T.length inp) ++ " more chars" | |
| eatWhile :: (Char -> Bool) -> Parser T.Text | |
| eatWhile p = Parser $ \inp -> | |
| let (v, rest) = T.break (not . p) inp in | |
| Right (rest, v) | |
| charIf :: (Char -> Bool) -> Parser Char | |
| charIf p = Parser $ \inp -> case T.uncons inp of | |
| Nothing -> Left "Expecting one more char" | |
| Just (c, rest) -> if p c | |
| then Right (rest, c) | |
| else Left $ "Unexpected char: `" ++ c : "`" | |
| eof :: Parser () | |
| eof = Parser $ \inp -> if T.null inp | |
| then Right (inp, ()) | |
| else Left $ "Extra chars: `" ++ T.unpack inp ++ "`" | |
| -- Fail if empty | |
| eatWhile' :: (Char -> Bool) -> Parser T.Text | |
| eatWhile' p = eatWhile p >>= \s -> if T.null s | |
| then fail "Nothing consumed" | |
| else pure s | |
| string :: T.Text -> Parser T.Text | |
| string s = do | |
| s' <- eat (T.length s) | |
| if s == s' | |
| then return s' | |
| else fail $ "Expecting `" ++ T.unpack s ++ "`" | |
| delimited :: Parser b -> Parser c -> Parser a -> Parser a | |
| delimited beg end inner = beg *> inner <* end | |
| separated :: Parser b -> Parser a -> Parser [a] | |
| separated sep p = (:) <$> p <*> many (sep *> p) | |
| <|> pure [] | |
| ws :: Parser () | |
| ws = () <$ eatWhile (`elem` (" \n\r\t" :: String)) | |
| data JsonValue | |
| = JsonNull | |
| | JsonBool Bool | |
| | JsonString T.Text | |
| | JsonNumber Double | |
| | JsonArray [JsonValue] | |
| | JsonObject (Map T.Text JsonValue) | |
| deriving (Show, Eq) | |
| jsonValue :: Parser JsonValue | |
| jsonValue = JsonNull <$ string "null" | |
| <|> JsonBool True <$ string "true" | |
| <|> JsonBool False <$ string "false" | |
| <|> JsonString <$> jsonString | |
| <|> JsonNumber <$> jsonNumber | |
| <|> JsonArray <$> delimited (string "[" >> ws) (ws >> string "]") | |
| (separated (ws >> string "," >> ws) jsonValue) | |
| <|> JsonObject . fromList <$> delimited (string "{" >> ws) (ws >> string "}") | |
| (separated (ws >> string "," >> ws) kvPair) | |
| where | |
| kvPair :: Parser (T.Text, JsonValue) | |
| kvPair = (,) <$> jsonString <* ws <* string ":" <* ws <*> jsonValue | |
| jsonString :: Parser T.Text | |
| jsonString = T.concat <$> delimited (string "\"") (string "\"") (many segment) | |
| where | |
| segment :: Parser T.Text | |
| segment = eatWhile' (\c -> c /= '\"' && c /= '\\') | |
| <|> "\"" <$ string "\\\"" | |
| <|> "\\" <$ string "\\\\" | |
| <|> "/" <$ string "\\/" | |
| <|> "\b" <$ string "\\b" | |
| <|> "\f" <$ string "\\f" | |
| <|> "\n" <$ string "\\n" | |
| <|> "\r" <$ string "\\r" | |
| <|> "\t" <$ string "\\t" | |
| <|> unicodeEscape | |
| unicodeEscape :: Parser T.Text | |
| unicodeEscape = do | |
| _ <- string "\\u" | |
| s <- T.unpack <$> eat 4 | |
| if all isHexDigit s | |
| then pure $ T.singleton $ chr $ read $ "0x" ++ s | |
| else fail $ "Invalid unicode escape: `" ++ s ++ "`" | |
| jsonNumber :: Parser Double | |
| jsonNumber = do | |
| signPart <- optional $ string "-" | |
| intPart <- string "0" | |
| <|> T.cons <$> charIf (`elem` ("123456789" :: String)) | |
| <*> eatWhile isDigit | |
| decPart <- optional $ T.append <$> string "." <*> eatWhile' isDigit | |
| expPart <- optional $ T.concat <$> sequence | |
| [ (string "E" <|> string "e") | |
| , (string "+" <|> string "-" <|> pure "") | |
| , eatWhile' isDigit ] | |
| return $ read $ concat | |
| [ maybe "" T.unpack signPart | |
| , T.unpack intPart | |
| , maybe "" T.unpack decPart | |
| , maybe "" T.unpack expPart | |
| ] | |
| parseJson :: T.Text -> Either String JsonValue | |
| parseJson = fmap snd . runParser (jsonValue <* eof) | |
| main :: IO () | |
| main = mapM_ (\(s, expect) -> do | |
| let got = parseJson (T.pack s) | |
| if expect == got | |
| then putStrLn "Passed" | |
| else do | |
| putStrLn s | |
| putStrLn $ "Expect: " ++ show expect | |
| putStrLn $ "Got: " ++ show got | |
| error "Failed") | |
| [ ("", Left ("Expecting 1 more chars" :: String)) | |
| , ("null", Right JsonNull) | |
| , ("0", Right (JsonNumber 0)) | |
| , ("123", Right (JsonNumber 123)) | |
| , ("0123", Left "Extra chars: `123`") | |
| , ("12.34e-5", Right (JsonNumber 1.234e-4)) | |
| , ("\"he llo\"", Right (JsonString "he llo")) | |
| , ("\"with\\nspecial\\u0000chars\"", Right (JsonString "with\nspecial\0chars")) | |
| , ("[ ]", Right (JsonArray [])) | |
| , ("[ 1 ]", Right (JsonArray [JsonNumber 1])) | |
| , ("[ 1 , 2 ]", Right (JsonArray [JsonNumber 1, JsonNumber 2])) | |
| , ("[ null , 1 , \"2\" , [ 3 ] ]", Right (JsonArray [JsonNull, JsonNumber 1, JsonString "2", JsonArray [JsonNumber 3]])) | |
| , ("{ }", Right (JsonObject (fromList []))) | |
| , ("{ \"foo\" : \"bar\" }", Right (JsonObject (fromList [("foo", JsonString "bar")]))) | |
| , ( | |
| "{ \"1\" : null , \"2\" : 2 , \"3\\n\" : { \"inner\" : [ { } ] } }", | |
| Right (JsonObject (fromList [ | |
| ("1", JsonNull), | |
| ("2", JsonNumber 2.0), | |
| ("3\n", JsonObject (fromList [ | |
| ("inner", JsonArray [JsonObject (fromList [])]) | |
| ])) | |
| ])) | |
| ) | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment