Created
October 24, 2017 19:00
-
-
Save msmorgan/a8fe0570fe4a86b68e7fa81c60de8b1f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
||| Recognises JSON string literals. | |
module Language.JSON.String | |
import Data.String.Extra | |
import Text.Lexer | |
import Text.Lexer.Util | |
import Text.Parser | |
%default total | |
%access private | |
data JSONStringToken | |
= JSTChar Char | |
| JSTSimpleEscape Char | |
| JSTUnicodeEscape Int | |
||| Recognise a Unicode control character. | |
control : Lexer | |
control = range '\x0000' '\x001f' | |
<|> range '\x007f' '\x009f' | |
||| Recognise a backslash followed by a sub-lexer. | |
esc : (l : Lexer) -> Lexer | |
esc = escape '\\' | |
simpleEscape : Lexer | |
simpleEscape = esc (oneOf "\"\\/bfnrt") | |
simpleEscapeValue : String -> Char | |
simpleEscapeValue str with (unpack str) | |
simpleEscapeValue str | ('\\' :: x :: []) = x | |
simpleEscapeValue str | _ = '\NUL' | |
unicodeEscape : Lexer | |
unicodeEscape = esc (is 'u' <+> exactly 4 hexDigit) | |
unicodeEscapeValue : String -> Int | |
unicodeEscapeValue str = cast ("0x" ++ drop 2 str) | |
charValue : String -> Char | |
charValue str = maybe '\NUL' id (index 0 str) | |
jsonStringTokenMap : TokenMap JSONStringToken | |
jsonStringTokenMap | |
= [ (simpleEscape, JSTSimpleEscape . simpleEscapeValue) | |
, (unicodeEscape, JSTUnicodeEscape . unicodeEscapeValue) | |
, (non control, JSTChar . charValue) | |
] | |
simpleEscapeTerm : JSONStringToken -> Maybe Char | |
simpleEscapeTerm (JSTSimpleEscape c) | |
= Just $ | |
case c of | |
'b' => '\b' | |
'n' => '\n' | |
'f' => '\f' | |
'r' => '\r' | |
't' => '\t' | |
_ => c -- `"`, `\`, or `/` | |
simpleEscapeTerm _ = Nothing | |
unicodeEscapeTerm : JSONStringToken -> Maybe Char | |
unicodeEscapeTerm (JSTUnicodeEscape x) = Just (chr x) | |
unicodeEscapeTerm _ = Nothing | |
charTerm : JSONStringToken -> Maybe Char | |
charTerm (JSTChar c) = Just c | |
charTerm _ = Nothing | |
parseStringChar : Grammar JSONStringToken True Char | |
parseStringChar = terminal simpleEscapeTerm | |
<|> terminal unicodeEscapeTerm | |
<|> terminal charTerm | |
||| Parse a string from string tokens. | |
parseString : Grammar JSONStringToken False String | |
parseString | |
= do chars <- many parseStringChar | |
eof | |
pure (pack chars) | |
stringLit : Lexer | |
stringLit = quote (is '"') (simpleEscape <|> unicodeEscape <|> non control) | |
||| Convert a string that was recognised as a `stringLit` to its string | |
||| value. Passing an invalid string results in undefined behavior. | |
stringLitValue : String -> String | |
-- stringLitValue str | |
-- = let str = shrink 1 str -- remove quotes | |
-- lexResult = lex jsonStringTokenMap str | |
-- tokens = map tok (fst lexResult) | |
-- parseResult = parse tokens parseString in | |
-- case parseResult of | |
-- (Right (x, _)) => x | |
-- _ => "__INVALID_STRING__" -- errors can't happen | |
export | |
record StringError where | |
constructor MkStringError | |
export | |
string : Literal String | |
-- string : Literal (Either StringError String) | |
-- string = (stringLit, parseString) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment