Created
April 11, 2020 21:18
-
-
Save pbgc/83b1902dfecd2a0b993216e22247973e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-| | |
the test example we are parsing (show [rec1,rec2]): | |
[ | |
MkPersonRecord { | |
name = "Wim Vanderbauwhede", | |
address = MkAddress { | |
line1 = "School of Computing Science", | |
number = 17, | |
street = "Lilybank Gdns", | |
town = "Glasgow", | |
postcode = "G12 8QQ" | |
}, | |
id = 557188, | |
labels = [Green,Red] | |
}, | |
MkPersonRecord { | |
name = "Jeremy Singer", | |
address = MkAddress { | |
line1 = "School of Computing Science", | |
number = 17, | |
street = "Lilybank Gdns", | |
town = "Glasgow", | |
postcode = "G12 8QQ" | |
}, | |
id = 42, | |
labels = [Blue,Yellow] | |
} | |
] | |
parsed output: | |
<?xml version="1.0" encoding="UTF-8"?> | |
<list> | |
<list-elt> | |
<record name="MkPersonRecord"> | |
<elt key="name">"Wim Vanderbauwhede"</elt> | |
<elt key="address"> | |
<record name="MkAddress"> | |
<elt key="line1">"School of Computing Science"</elt> | |
<elt key="number">17</elt> | |
<elt key="street">"Lilybank Gdns"</elt> | |
<elt key="town">"Glasgow"</elt> | |
<elt key="postcode">"G12 8QQ"</elt> | |
</record> | |
</elt> | |
<elt key="id">557188</elt> | |
<elt key="labels"> | |
<list> | |
<list-elt> | |
<adt>Green</adt> | |
</list-elt> | |
<list-elt> | |
<adt>Red</adt> | |
</list-elt> | |
</list> | |
</elt> | |
</record> | |
</list-elt> | |
<list-elt> | |
<record name="MkPersonRecord"> | |
<elt key="name">"Jeremy Singer"</elt> | |
<elt key="address"> | |
<record name="MkAddress"> | |
<elt key="line1">"School of Computing Science"</elt> | |
<elt key="number">17</elt> | |
<elt key="street">"Lilybank Gdns"</elt> | |
<elt key="town">"Glasgow"</elt> | |
<elt key="postcode">"G12 8QQ"</elt> | |
</record> | |
</elt> | |
<elt key="id">42</elt> | |
<elt key="labels"> | |
<list> | |
<list-elt> | |
<adt>Blue</adt> | |
</list-elt> | |
<list-elt> | |
<adt>Yellow</adt> | |
</list-elt> | |
</list> | |
</elt> | |
</record> | |
</list-elt> | |
</list> | |
-} | |
module ShowParser ( parseShow ) where | |
-- renamed original run_parser to runParser (hlint allways advice to use camelCase) | |
-- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile | |
-- could just use another name .. but .. learned about hiding :) | |
import Text.ParserCombinators.Parsec hiding (runParser) | |
import qualified Text.ParserCombinators.Parsec.Token as P | |
import Text.ParserCombinators.Parsec.Language | |
import Data.List ( intercalate ) | |
xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" | |
parseShow :: String -> String | |
parseShow str = xmlHeader ++ runParser showParser str | |
-- used :type parse on GHCI and got: | |
-- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a | |
-- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b. | |
-- The Either type is sometimes used to represent a value which is either correct or an error; | |
-- by convention, the Left constructor is used to hold an error value | |
-- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct"). | |
-- | |
-- We are applying parser p to the string str | |
-- If we get an error (Left err) we throw an exception message (with error) | |
-- otherwise we return the parsed value | |
runParser :: Parser a -> String -> a | |
runParser p str = case parse p "" str of | |
Left err -> error $ "parse error at " ++ show err | |
Right val -> val | |
-- --------------------------------------------------------------- | |
-- helper functions to create XML tags with and without attributes | |
-- --------------------------------------------------------------- | |
openTag tag = "<" ++ tag ++ ">" | |
closeTag tag = "</" ++ tag ++ ">" | |
-- XML tags: <tag>val</tag> | |
-- example: tag "list" 1 -> <list>1</list> | |
-- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists) | |
tag tag val = concat [openTag tag, val, closeTag tag] | |
-- unwords :: [String] -> String | |
-- unwords is an inverse operation to words. It joins words with separating spaces. | |
-- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi" | |
-- we obtain: <test at1="1" at2="2">yupi</test> | |
-- we are concateneting: | |
-- openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs)) -> <test at1="1" at2="2"> | |
-- val -> yupi | |
-- closeTag tag -> </test> | |
tagAttrs tag attrs val = concat [ | |
openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs, -- changed .. think it's more readable like this | |
val, | |
closeTag tag | |
] | |
-- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list | |
-- after interspersing the first argument between each element of the list. | |
-- joinNL could be substituted with unlines from the prelude | |
-- this was just to illustrate the use of intercalate and the Data.List module | |
joinNL ls = intercalate "\n" ls | |
-- ------------------- | |
-- Parsers Definitions | |
-- ------------------- | |
-- ---------------------------------------------------- | |
-- Complete parser | |
-- Combine all parsers using the choice combinator <|> | |
-- ---------------------------------------------------- | |
-- Parsing alternatives | |
-- Often we want to try one parser; if that fails, then try another one instead. | |
-- The choice combinator <|> provides this functionality. | |
showParser :: Parser String | |
showParser = | |
listParser <|> -- [ ... ] | |
tupleParser <|> -- ( ... ) | |
try recordParser <|> -- MkRec { ... } | |
adtParser <|> -- MkADT ... | |
number <|> -- signed integer | |
quotedString <?> "Parse error" | |
-- ----------------------------------- | |
-- Parsers for the derived Show format | |
-- ----------------------------------- | |
-- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error | |
-- return :: a -> m a Source | |
-- Inject a value into the monadic type. | |
quotedString = do | |
s <- stringLiteral | |
return $ "\"" ++ s ++ "\"" | |
number = do | |
n <- integer | |
return $ show n | |
listParser = do | |
-- parse brackets and comma separated values | |
ls <- brackets $ commaSep showParser | |
-- a list [1, 2, 3] will be outputed as: | |
-- <list> | |
-- <list-elt>1</list-elt> | |
-- <list-elt>2</list-elt> | |
-- <list-elt>3</list-elt> | |
-- </list> | |
return $ tag "list" $ joinNL $ map (tag "list-elt") ls | |
-- tupleParser is not used in the test example | |
tupleParser = do | |
-- parse parens and comma separated values | |
ls <- parens $ commaSep showParser | |
-- a tuple (1, 2) will be outputed as: | |
-- <tuple> | |
-- <tuple-elt>1</tuple-elt> | |
-- <tuple-elt>2</tuple-elt> | |
-- </tuple> | |
return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls | |
{-| | |
MkPersonRecord { | |
name = "Jeremy Singer", | |
address = MkAddress { | |
line1 = "School of Computing Science", | |
number = 17, | |
street = "Lilybank Gdns", | |
town = "Glasgow", | |
postcode = "G12 8QQ" | |
}, | |
id = 42, | |
labels = [Blue,Yellow] | |
} | |
turns into: | |
<record name="MkPersonRecord"> | |
<elt key="name">"Jeremy Singer"</elt> | |
<elt key="address"> | |
<record name="MkAddress"> | |
<elt key="line1">"School of Computing Science"</elt> | |
<elt key="number">17</elt> | |
<elt key="street">"Lilybank Gdns"</elt> | |
<elt key="town">"Glasgow"</elt> | |
<elt key="postcode">"G12 8QQ"</elt> | |
</record> | |
</elt> | |
<elt key="id">42</elt> | |
<elt key="labels"> | |
<list> | |
<list-elt><adt>Blue</adt></list-elt> | |
<list-elt><adt>Yellow</adt></list-elt> | |
</list> | |
</elt> | |
</record> | |
using: | |
- recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number | |
- kvParser for: address = | |
- recordParser for: MkAddress { .... } | |
- kvParser for: labels = | |
- adtParser for: Blue and Yellow | |
-} | |
recordParser = do | |
-- a Record has a Type Identifier, ex: MkPersonRecord | |
ti <- typeIdentifier | |
-- braces and comma separated values parsed by kvParser | |
ls <- braces $ commaSep kvParser | |
return $ tagAttrs "record" [("name", ti)] (joinNL ls) | |
{-| | |
labels = [Blue,Yellow] | |
turns into | |
<elt key="labels"> | |
<list> | |
<list-elt><adt>Blue</adt></list-elt> | |
<list-elt><adt>Yellow</adt></list-elt> | |
</list> | |
</elt> | |
using: | |
- kvParser for: labels = | |
- listParser for: [ .., ..] | |
- adtParser for: Blue and Yellow | |
-} | |
adtParser = do | |
ti <- typeIdentifier | |
return $ tag "adt" ti | |
{-| | |
name = "Jeremy Singer", | |
turns into: | |
<elt key="name">"Jeremy Singer"</elt> | |
-} | |
kvParser = do | |
k <- identifier | |
symbol "=" | |
t <- showParser | |
return $ tagAttrs "elt" [("key", k)] t | |
typeIdentifier = do | |
-- a Type identifier begins with a Capital Letter | |
fst <- oneOf ['A' .. 'Z'] | |
-- read the rest ... (many alphanumerics) | |
rest <- many alphaNum | |
whiteSpace | |
return $ fst:rest | |
-- The Parsec.Token module provides a number of basic parsers. | |
-- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition. | |
-- Here we use emptyDef from the Language module. | |
lexer = P.makeTokenParser emptyDef | |
-- shorter name for the predefined parsers | |
parens = P.parens lexer | |
brackets = P.brackets lexer | |
braces = P.braces lexer | |
commaSep = P.commaSep lexer | |
whiteSpace = P.whiteSpace lexer | |
symbol = P.symbol lexer | |
identifier = P.identifier lexer | |
integer = P.integer lexer | |
stringLiteral = P.stringLiteral lexer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment