Skip to content

Instantly share code, notes, and snippets.

@msmorgan
Created October 24, 2017 19:00
Show Gist options
  • Save msmorgan/a8fe0570fe4a86b68e7fa81c60de8b1f to your computer and use it in GitHub Desktop.
Save msmorgan/a8fe0570fe4a86b68e7fa81c60de8b1f to your computer and use it in GitHub Desktop.
||| Recognises JSON string literals.
module Language.JSON.String
import Data.String.Extra
import Text.Lexer
import Text.Lexer.Util
import Text.Parser
%default total
%access private
data JSONStringToken
= JSTChar Char
| JSTSimpleEscape Char
| JSTUnicodeEscape Int
||| Recognise a Unicode control character.
control : Lexer
control = range '\x0000' '\x001f'
<|> range '\x007f' '\x009f'
||| Recognise a backslash followed by a sub-lexer.
esc : (l : Lexer) -> Lexer
esc = escape '\\'
simpleEscape : Lexer
simpleEscape = esc (oneOf "\"\\/bfnrt")
simpleEscapeValue : String -> Char
simpleEscapeValue str with (unpack str)
simpleEscapeValue str | ('\\' :: x :: []) = x
simpleEscapeValue str | _ = '\NUL'
unicodeEscape : Lexer
unicodeEscape = esc (is 'u' <+> exactly 4 hexDigit)
unicodeEscapeValue : String -> Int
unicodeEscapeValue str = cast ("0x" ++ drop 2 str)
charValue : String -> Char
charValue str = maybe '\NUL' id (index 0 str)
jsonStringTokenMap : TokenMap JSONStringToken
jsonStringTokenMap
= [ (simpleEscape, JSTSimpleEscape . simpleEscapeValue)
, (unicodeEscape, JSTUnicodeEscape . unicodeEscapeValue)
, (non control, JSTChar . charValue)
]
simpleEscapeTerm : JSONStringToken -> Maybe Char
simpleEscapeTerm (JSTSimpleEscape c)
= Just $
case c of
'b' => '\b'
'n' => '\n'
'f' => '\f'
'r' => '\r'
't' => '\t'
_ => c -- `"`, `\`, or `/`
simpleEscapeTerm _ = Nothing
unicodeEscapeTerm : JSONStringToken -> Maybe Char
unicodeEscapeTerm (JSTUnicodeEscape x) = Just (chr x)
unicodeEscapeTerm _ = Nothing
charTerm : JSONStringToken -> Maybe Char
charTerm (JSTChar c) = Just c
charTerm _ = Nothing
parseStringChar : Grammar JSONStringToken True Char
parseStringChar = terminal simpleEscapeTerm
<|> terminal unicodeEscapeTerm
<|> terminal charTerm
||| Parse a string from string tokens.
parseString : Grammar JSONStringToken False String
parseString
= do chars <- many parseStringChar
eof
pure (pack chars)
stringLit : Lexer
stringLit = quote (is '"') (simpleEscape <|> unicodeEscape <|> non control)
||| Convert a string that was recognised as a `stringLit` to its string
||| value. Passing an invalid string results in undefined behavior.
stringLitValue : String -> String
-- stringLitValue str
-- = let str = shrink 1 str -- remove quotes
-- lexResult = lex jsonStringTokenMap str
-- tokens = map tok (fst lexResult)
-- parseResult = parse tokens parseString in
-- case parseResult of
-- (Right (x, _)) => x
-- _ => "__INVALID_STRING__" -- errors can't happen
export
record StringError where
constructor MkStringError
export
string : Literal String
-- string : Literal (Either StringError String)
-- string = (stringLit, parseString)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment