Created
November 12, 2023 16:26
-
-
Save Profpatsch/1869cbc90c2cead03022caf84223fd34 to your computer and use it in GitHub Desktop.
Parser for a json dialect which supports tagged values/sums of the syntax: `< "key": value >`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Abc (jsonWith') where | |
import Data.Aeson hiding (Value (..)) | |
import Data.Aeson.Key qualified as Key | |
import Data.Aeson.KeyMap qualified as KM | |
import Data.Aeson.Parser.Internal hiding (jsonWith') | |
import Data.Attoparsec.ByteString qualified as A | |
import Data.Attoparsec.ByteString.Char8 (Parser, char, string) | |
import Data.Function (fix) | |
import Data.Functor (($>)) | |
import Data.Scientific (Scientific) | |
import Data.Text (Text) | |
import Data.Vector (Vector) | |
import Data.Vector qualified as Vector (empty, fromListN, reverse) | |
import Data.Word (Word8) | |
data Value | |
= Object (KM.KeyMap Value) | |
| Array (Vector Value) | |
| Tag (Key, Value) | |
| String Text | |
| Number Scientific | |
| Bool Bool | |
| Null | |
deriving stock (Show) | |
-- | Strict version of 'jsonWith'. | |
jsonWith' :: ([(Key, Value)] -> Either String (KM.KeyMap Value)) -> Parser Value | |
jsonWith' mkObject = fix $ \value_ -> do | |
skipSpace | |
w <- A.peekWord8' | |
case w of | |
W8_DOUBLE_QUOTE -> do | |
!s <- A.anyWord8 *> jstring_ | |
pure (String s) | |
W8_OPEN_CURLY -> A.anyWord8 *> object_' mkObject value_ | |
W8_OPEN_SQUARE -> A.anyWord8 *> array_' value_ | |
W8_OPEN_ANGLE -> A.anyWord8 *> tagged_' value_ | |
W8_f -> string "false" $> Bool False | |
W8_t -> string "true" $> Bool True | |
W8_n -> string "null" $> Null | |
_ | |
| w >= W8_0 && w <= W8_9 || w == W8_MINUS -> | |
do | |
!n <- scientific | |
pure (Number n) | |
| otherwise -> fail "not a valid json value" | |
{-# INLINE jsonWith' #-} | |
-- | The only valid whitespace in a JSON document is space, newline, | |
-- carriage return, and tab. | |
skipSpace :: Parser () | |
skipSpace = A.skipWhile $ \w -> w == W8_SPACE || w == W8_NL || w == W8_CR || w == W8_TAB | |
{-# INLINE skipSpace #-} | |
-- | Parse a JSON Key | |
key :: Parser Key | |
key = Key.fromText <$> jstring | |
object_' :: ([(Key, Value)] -> Either String (KM.KeyMap Value)) -> Parser Value -> Parser Value | |
object_' mkObject val' = do | |
!vals <- objectValues mkObject key' val' | |
pure (Object vals) | |
where | |
key' = do | |
!s <- key | |
pure s | |
{-# INLINE object_' #-} | |
objectValues :: | |
([(Key, Value)] -> Either String (KM.KeyMap Value)) -> | |
Parser Key -> | |
Parser Value -> | |
Parser (KM.KeyMap Value) | |
objectValues mkObject str val = do | |
skipSpace | |
w <- A.peekWord8' | |
if w == W8_CLOSE_CURLY | |
then A.anyWord8 >> pure KM.empty | |
else loop [] | |
where | |
-- Why use acc pattern here, you may ask? because then the underlying 'KM.fromList' | |
-- implementation can make use of mutation when constructing a map. For example, | |
-- 'HashMap` uses 'unsafeInsert' and it's much faster because it's doing in place | |
-- update to the 'HashMap'! | |
loop :: [(Key, Value)] -> Parser (KM.KeyMap Value) | |
loop acc = do | |
k <- (str A.<?> "object key") <* skipSpace <* (char ':' A.<?> "':'") | |
v <- (val A.<?> "object value") <* skipSpace | |
ch <- A.satisfy (\w -> w == W8_COMMA || w == W8_CLOSE_CURLY) A.<?> "',' or '}'" | |
let acc' = (k, v) : acc | |
if ch == W8_COMMA | |
then skipSpace >> loop acc' | |
else case mkObject acc' of | |
Left err -> fail err | |
Right obj -> pure obj | |
{-# INLINE objectValues #-} | |
array_' :: Parser Value -> Parser Value | |
array_' val = do | |
!vals <- arrayValues val | |
pure (Array vals) | |
{-# INLINE array_' #-} | |
arrayValues :: Parser Value -> Parser (Vector Value) | |
arrayValues val = do | |
skipSpace | |
w <- A.peekWord8' | |
if w == W8_CLOSE_SQUARE | |
then A.anyWord8 >> pure Vector.empty | |
else loop [] 1 | |
where | |
loop acc !len = do | |
v <- (val A.<?> "json list value") <* skipSpace | |
ch <- A.satisfy (\w -> w == W8_COMMA || w == W8_CLOSE_SQUARE) A.<?> "',' or ']'" | |
if ch == W8_COMMA | |
then skipSpace >> loop (v : acc) (len + 1) | |
else pure (Vector.reverse (Vector.fromListN len (v : acc))) | |
{-# INLINE arrayValues #-} | |
tagged_' :: | |
Parser Value -> | |
Parser Value | |
tagged_' val = do | |
!vals <- taggedValues key' val | |
pure (Tag vals) | |
where | |
key' = do | |
!s <- key | |
pure s | |
{-# INLINE tagged_' #-} | |
taggedValues :: | |
Parser Key -> | |
Parser Value -> | |
Parser (Key, Value) | |
taggedValues str val = do | |
skipSpace | |
k <- (str A.<?> "json tag key") <* skipSpace <* (char ':' A.<?> "':'") | |
v <- (val A.<?> "Json tag value") <* skipSpace | |
_ch <- A.satisfy (== W8_CLOSE_ANGLE) A.<?> "'>'" | |
pure (k, v) | |
{-# INLINE taggedValues #-} | |
pattern W8_SPACE :: Word8 | |
pattern W8_NL :: Word8 | |
pattern W8_CR :: Word8 | |
pattern W8_TAB :: Word8 | |
pattern W8_SPACE = 0x20 | |
pattern W8_NL = 0x0a | |
pattern W8_CR = 0x0d | |
pattern W8_TAB = 0x09 | |
-- punctuation | |
pattern W8_DOUBLE_QUOTE :: Word8 | |
pattern W8_COMMA :: Word8 | |
pattern W8_COMMA = 44 | |
pattern W8_DOUBLE_QUOTE = 34 | |
-- parentheses | |
pattern W8_CLOSE_CURLY :: Word8 | |
pattern W8_CLOSE_SQUARE :: Word8 | |
pattern W8_OPEN_SQUARE :: Word8 | |
pattern W8_OPEN_CURLY :: Word8 | |
pattern W8_OPEN_CURLY = 123 | |
pattern W8_OPEN_SQUARE = 91 | |
pattern W8_CLOSE_CURLY = 125 | |
pattern W8_CLOSE_SQUARE = 93 | |
pattern W8_OPEN_ANGLE :: Word8 | |
pattern W8_CLOSE_ANGLE :: Word8 | |
pattern W8_OPEN_ANGLE = 60 | |
pattern W8_CLOSE_ANGLE = 62 | |
-- operators | |
pattern W8_MINUS :: Word8 | |
pattern W8_MINUS = 45 | |
-- digits | |
pattern W8_0 :: Word8 | |
pattern W8_9 :: Word8 | |
pattern W8_0 = 48 | |
pattern W8_9 = 57 | |
-- lower case | |
pattern W8_f :: Word8 | |
pattern W8_n :: Word8 | |
pattern W8_t :: Word8 | |
pattern W8_f = 102 | |
pattern W8_n = 110 | |
pattern W8_t = 116 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment