Created
December 25, 2013 05:21
-
-
Save kazu-yamamoto/8120392 to your computer and use it in GitHub Desktop.
Simple JSON parser in Haskell
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | This JSON package retains the order of array elements. | |
-- JSON: http://www.ietf.org/rfc/rfc4627.txt | |
module JSON ( | |
JSON(..) | |
, parseJSON | |
) where | |
import Control.Applicative ((<*),(*>),(<$>),(<$)) | |
import Control.Monad (void) | |
import Data.Char | |
import Data.List (foldl') | |
import Text.Parsec | |
import Text.Parsec.String | |
---------------------------------------------------------------- | |
parseJSON :: String -> Either ParseError JSON | |
parseJSON xs = parse json "json" xs | |
---------------------------------------------------------------- | |
data JSON = JSNull | |
| JSBool Bool | |
| JSNumber Int | |
| JSString String | |
| JSArray [JSON] | |
| JSObject [(String,JSON)] | |
deriving (Show, Eq) | |
---------------------------------------------------------------- | |
json :: Parser JSON | |
json = ws *> jsValue | |
jsValue :: Parser JSON | |
jsValue = choice [jsNull,jsBool,jsObject,jsArray,jsNumber,jsString] | |
---------------------------------------------------------------- | |
-- | | |
-- | |
-- >>> parseJSON " null " | |
-- Right JSNull | |
jsNull :: Parser JSON | |
jsNull = jsAtom "null" JSNull | |
-- | | |
-- | |
-- >>> parseJSON " false " | |
-- Right (JSBool False) | |
-- >>> parseJSON "true" | |
-- Right (JSBool True) | |
jsBool :: Parser JSON | |
jsBool = jsAtom "false" (JSBool False) | |
<|> jsAtom "true" (JSBool True) | |
---------------------------------------------------------------- | |
-- | | |
-- | |
-- >>> parseJSON " { \"key1\": 2 , \"key2\" : false } " | |
-- Right (JSObject [("key1",JSNumber 2),("key2",JSBool False)]) | |
jsObject :: Parser JSON | |
jsObject = JSObject <$> betweenWs '{' kvs '}' | |
where | |
kvs = kv `sepBy` charWs ',' | |
kv = do | |
JSString key <- jsString | |
charWs ':' | |
val <- jsValue | |
return (key, val) | |
---------------------------------------------------------------- | |
-- | | |
-- | |
-- >>> parseJSON " [ 1 , \"foo\" , true ] " | |
-- Right (JSArray [JSNumber 1,JSString "foo",JSBool True]) | |
jsArray :: Parser JSON | |
jsArray = JSArray <$> betweenWs '[' vals ']' | |
where | |
vals = jsValue `sepBy` charWs ',' | |
---------------------------------------------------------------- | |
-- | Integer only. | |
-- | |
-- >>> parseJSON " 123 " | |
-- Right (JSNumber 123) | |
-- >>> parseJSON " -456 " | |
-- Right (JSNumber (-456)) | |
jsNumber :: Parser JSON | |
jsNumber = JSNumber <$> do | |
sign <- option id (negate <$ char '-') | |
ns <- many1 $ oneOf ['0'..'9'] | |
ws | |
return $ sign $ fromInts ns | |
where | |
fromInts = foldl' (\x y -> x*10 + toInt y) 0 | |
toInt n = ord n - ord '0' | |
---------------------------------------------------------------- | |
-- | Non Unicode only. | |
-- | |
-- >>> parseJSON " \"foo bar baz\" " | |
-- Right (JSString "foo bar baz") | |
jsString :: Parser JSON | |
jsString = JSString <$> (between (char '"') (char '"') (many jsChar) <* ws) | |
where | |
jsChar = unescaped <|> escaped | |
unescaped = noneOf "\"\\" | |
escaped = char '\\' *> escapedChar | |
escapedChar :: Parser Char | |
escapedChar = choice $ map ch alist | |
where | |
ch (x,y) = y <$ char x | |
alist = [ | |
('b', '\b') | |
, ('f', '\f') | |
, ('n', '\n') | |
, ('r', '\r') | |
, ('t', '\t') | |
, ('\\','\\') | |
, ('\"','\"') | |
] | |
---------------------------------------------------------------- | |
ws :: Parser () | |
ws = void $ many $ oneOf " \t\r\n" | |
jsAtom :: String -> JSON -> Parser JSON | |
jsAtom str val = val <$ (string str <* ws) | |
charWs :: Char -> Parser () | |
charWs c = char c *> ws | |
betweenWs :: Char -> Parser a -> Char -> Parser a | |
betweenWs o vals c = charWs o *> vals <* charWs c |
I'm not sure which part you don't understand but '\n' <$ char 'n'
is equivalent to do { _ <- char 'n'; return '\n' }
. So, if escape
finds \
and b
in order, it returns a newline character.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great stuff, super helpful and informative!
When using
ghc
8.8.3, I had to add{-# LANGUAGE FlexibleContexts #-}
to get it compile, otherwise it complained aboutch (x,y) = y <$ char x
. If I understand your example corretly, this can be fixed by definingch
at the toplevel context:Also, there are some conventions in here that seem a little verbose, probably stemming from my lack of understanding of how parsec works. It would be especially helpful knowing why
escpaed
is defined the way it is defined.