pbgc · April 11, 2020 21:18
diff --git a/ShowParser.hs b/ShowParser.hs
 {-|

 the test example we are parsing (show [rec1,rec2]):

 [
  MkPersonRecord {
    name = "Wim Vanderbauwhede",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 557188,
    labels = [Green,Red]
  },
  MkPersonRecord {
    name = "Jeremy Singer",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 42,
    labels = [Blue,Yellow]
  }
 ]

 parsed output:

 <?xml version="1.0" encoding="UTF-8"?>
 <list>
    <list-elt>
        <record name="MkPersonRecord">
           <elt key="name">"Wim Vanderbauwhede"</elt>
           <elt key="address">
               <record name="MkAddress">
                   <elt key="line1">"School of Computing Science"</elt>
                   <elt key="number">17</elt>
                   <elt key="street">"Lilybank Gdns"</elt>
                   <elt key="town">"Glasgow"</elt>
                   <elt key="postcode">"G12 8QQ"</elt>
               </record>
           </elt>
           <elt key="id">557188</elt>
           <elt key="labels">
               <list>
                   <list-elt>
                       <adt>Green</adt>
                   </list-elt>
                   <list-elt>
                       <adt>Red</adt>
                   </list-elt>
               </list>
           </elt>
        </record>
    </list-elt>
    <list-elt>
        <record name="MkPersonRecord">
            <elt key="name">"Jeremy Singer"</elt>
            <elt key="address">
                <record name="MkAddress">
                    <elt key="line1">"School of Computing Science"</elt>
                    <elt key="number">17</elt>
                    <elt key="street">"Lilybank Gdns"</elt>
                    <elt key="town">"Glasgow"</elt>
                    <elt key="postcode">"G12 8QQ"</elt>
                </record>
            </elt>
            <elt key="id">42</elt>
            <elt key="labels">
                <list>
                    <list-elt>
                        <adt>Blue</adt>
                    </list-elt>
                    <list-elt>
                        <adt>Yellow</adt>
                    </list-elt>
                </list>
            </elt>
        </record>
    </list-elt>
 </list>

 -} 

 module ShowParser ( parseShow ) where
 -- renamed original run_parser to runParser (hlint allways advice to use camelCase)
 -- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile
 -- could just use another name .. but .. learned about hiding :)
 import Text.ParserCombinators.Parsec hiding (runParser)
 import qualified Text.ParserCombinators.Parsec.Token as P
 import Text.ParserCombinators.Parsec.Language
 import Data.List ( intercalate )


 xmlHeader =  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

 parseShow :: String -> String
 parseShow str = xmlHeader ++ runParser showParser str

 -- used :type parse on GHCI and got:
 -- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a
 -- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b.
 -- The Either type is sometimes used to represent a value which is either correct or an error;
 -- by convention, the Left constructor is used to hold an error value
 -- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct").
 --
 -- We are applying parser p to the string str
 -- If we get an error (Left err) we throw an exception message (with error)
 -- otherwise we return the parsed value
 runParser :: Parser a -> String -> a
 runParser p str =  case parse p "" str of
    Left err -> error $ "parse error at " ++ show err
    Right val  -> val  

 -- ---------------------------------------------------------------
 -- helper functions to create XML tags with and without attributes
 -- ---------------------------------------------------------------

 openTag tag = "<" ++ tag ++ ">"
 closeTag tag = "</" ++ tag ++ ">"
 -- XML tags: <tag>val</tag>
 -- example: tag "list" 1 -> <list>1</list>
 -- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists)
 tag tag val = concat [openTag tag, val, closeTag tag]

 -- unwords :: [String] -> String
 -- unwords is an inverse operation to words. It joins words with separating spaces.
 -- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi"
 -- we obtain: <test at1="1" at2="2">yupi</test>
 -- we are concateneting:
 --     openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs))    ->   <test at1="1" at2="2">
 --     val                                                                                 ->   yupi
 --     closeTag tag                                                                        ->   </test>
 tagAttrs tag attrs val = concat [
  openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs,  -- changed .. think it's more readable like this
  val,
  closeTag tag
  ]

 -- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list
 -- after interspersing the first argument between each element of the list.
 -- joinNL could be substituted with unlines from the prelude
 -- this was just to illustrate the use of intercalate and the Data.List module
 joinNL ls = intercalate "\n" ls



 -- -------------------
 -- Parsers Definitions
 -- -------------------


 -- ----------------------------------------------------
 -- Complete parser
 -- Combine all parsers using the choice combinator <|>
 -- ----------------------------------------------------

 -- Parsing alternatives
 -- Often we want to try one parser; if that fails, then try another one instead.
 -- The choice combinator <|> provides this functionality.
 showParser :: Parser String 
 showParser =
    listParser <|> -- [ ... ]
    tupleParser <|> -- ( ... )
    try recordParser <|> -- MkRec { ... }
    adtParser <|> -- MkADT ...
    number <|>    -- signed integer
    quotedString <?> "Parse error"


 -- -----------------------------------
 -- Parsers for the derived Show format
 -- -----------------------------------


 -- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error
 -- return :: a -> m a Source
 --    Inject a value into the monadic type.
 quotedString = do
    s <- stringLiteral
    return $ "\"" ++ s ++ "\""


 number = do
    n <- integer
    return $ show n


 listParser = do
    -- parse brackets and comma separated values
    ls <- brackets $ commaSep showParser
    -- a list [1, 2, 3] will be outputed as:
    --  <list>
    --      <list-elt>1</list-elt>
    --      <list-elt>2</list-elt>
    --      <list-elt>3</list-elt>
    --  </list>
    return $ tag "list" $ joinNL $ map (tag "list-elt") ls


 -- tupleParser is not used in the test example
 tupleParser = do
    -- parse parens and comma separated values
    ls <- parens $ commaSep showParser
    -- a tuple (1, 2) will be outputed as:
    --  <tuple>
    --      <tuple-elt>1</tuple-elt>
    --      <tuple-elt>2</tuple-elt>
    --  </tuple>
    return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls


 {-|
  MkPersonRecord {
    name = "Jeremy Singer",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 42,
    labels = [Blue,Yellow]
  }

 turns into:

 <record name="MkPersonRecord">
    <elt key="name">"Jeremy Singer"</elt>
    <elt key="address">
        <record name="MkAddress">
           <elt key="line1">"School of Computing Science"</elt>
           <elt key="number">17</elt>
           <elt key="street">"Lilybank Gdns"</elt>
           <elt key="town">"Glasgow"</elt>
           <elt key="postcode">"G12 8QQ"</elt>
        </record>
    </elt>
    <elt key="id">42</elt>
    <elt key="labels">
        <list>
           <list-elt><adt>Blue</adt></list-elt>
           <list-elt><adt>Yellow</adt></list-elt>
        </list>
    </elt>
 </record>

 using:
  - recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number
  - kvParser for: address =
  - recordParser for: MkAddress { .... }
  - kvParser for: labels =
  - adtParser for: Blue and Yellow
 -}
 recordParser = do
    -- a Record has a Type Identifier, ex: MkPersonRecord
    ti <- typeIdentifier
    -- braces and comma separated values parsed by kvParser
    ls <- braces $ commaSep kvParser
    return $ tagAttrs "record" [("name", ti)] (joinNL ls)


 {-|
 labels = [Blue,Yellow]

 turns into

 <elt key="labels">
    <list>
        <list-elt><adt>Blue</adt></list-elt>
        <list-elt><adt>Yellow</adt></list-elt>
    </list>
 </elt>

 using:
  - kvParser for: labels =
  - listParser for: [ .., ..]
  - adtParser for: Blue and Yellow
 -}
 adtParser = do
    ti <- typeIdentifier 
    return $ tag "adt" ti


 {-|
 name = "Jeremy Singer",

 turns into:

 <elt key="name">"Jeremy Singer"</elt>
 -}
 kvParser = do
    k <- identifier
    symbol "="
    t <- showParser
    return $ tagAttrs "elt" [("key", k)] t


 typeIdentifier = do
    -- a Type identifier begins with a Capital Letter
    fst <- oneOf ['A' .. 'Z']
    -- read the rest ... (many alphanumerics)
    rest <- many alphaNum
    whiteSpace
    return $ fst:rest



 -- The Parsec.Token module provides a number of basic parsers.
 -- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition.
 -- Here we use emptyDef from the Language module.
 lexer           = P.makeTokenParser emptyDef

 -- shorter name for the predefined parsers

 parens          = P.parens lexer    
 brackets        = P.brackets lexer    
 braces          = P.braces lexer    
 commaSep        = P.commaSep lexer
 whiteSpace      = P.whiteSpace lexer    
 symbol          = P.symbol lexer    
 identifier      = P.identifier lexer
 integer         = P.integer lexer    
 stringLiteral   = P.stringLiteral lexer
	{-\|

	the test example we are parsing (show [rec1,rec2]):

	[
	MkPersonRecord {
	name = "Wim Vanderbauwhede",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 557188,
	labels = [Green,Red]
	},
	MkPersonRecord {
	name = "Jeremy Singer",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 42,
	labels = [Blue,Yellow]
	}
	]

	parsed output:

	<?xml version="1.0" encoding="UTF-8"?>
	<list>
	<list-elt>
	<record name="MkPersonRecord">
	<elt key="name">"Wim Vanderbauwhede"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">557188</elt>
	<elt key="labels">
	<list>
	<list-elt>
	<adt>Green</adt>
	</list-elt>
	<list-elt>
	<adt>Red</adt>
	</list-elt>
	</list>
	</elt>
	</record>
	</list-elt>
	<list-elt>
	<record name="MkPersonRecord">
	<elt key="name">"Jeremy Singer"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">42</elt>
	<elt key="labels">
	<list>
	<list-elt>
	<adt>Blue</adt>
	</list-elt>
	<list-elt>
	<adt>Yellow</adt>
	</list-elt>
	</list>
	</elt>
	</record>
	</list-elt>
	</list>

	-}

	module ShowParser ( parseShow ) where
	-- renamed original run_parser to runParser (hlint allways advice to use camelCase)
	-- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile
	-- could just use another name .. but .. learned about hiding :)
	import Text.ParserCombinators.Parsec hiding (runParser)
	import qualified Text.ParserCombinators.Parsec.Token as P
	import Text.ParserCombinators.Parsec.Language
	import Data.List ( intercalate )


	xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

	parseShow :: String -> String
	parseShow str = xmlHeader ++ runParser showParser str

	-- used :type parse on GHCI and got:
	-- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a
	-- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b.
	-- The Either type is sometimes used to represent a value which is either correct or an error;
	-- by convention, the Left constructor is used to hold an error value
	-- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct").
	--
	-- We are applying parser p to the string str
	-- If we get an error (Left err) we throw an exception message (with error)
	-- otherwise we return the parsed value
	runParser :: Parser a -> String -> a
	runParser p str = case parse p "" str of
	Left err -> error $ "parse error at " ++ show err
	Right val -> val

	-- ---------------------------------------------------------------
	-- helper functions to create XML tags with and without attributes
	-- ---------------------------------------------------------------

	openTag tag = "<" ++ tag ++ ">"
	closeTag tag = "</" ++ tag ++ ">"
	-- XML tags: <tag>val</tag>
	-- example: tag "list" 1 -> <list>1</list>
	-- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists)
	tag tag val = concat [openTag tag, val, closeTag tag]

	-- unwords :: [String] -> String
	-- unwords is an inverse operation to words. It joins words with separating spaces.
	-- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi"
	-- we obtain: <test at1="1" at2="2">yupi</test>
	-- we are concateneting:
	-- openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs)) -> <test at1="1" at2="2">
	-- val -> yupi
	-- closeTag tag -> </test>
	tagAttrs tag attrs val = concat [
	openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs, -- changed .. think it's more readable like this
	val,
	closeTag tag
	]

	-- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list
	-- after interspersing the first argument between each element of the list.
	-- joinNL could be substituted with unlines from the prelude
	-- this was just to illustrate the use of intercalate and the Data.List module
	joinNL ls = intercalate "\n" ls



	-- -------------------
	-- Parsers Definitions
	-- -------------------


	-- ----------------------------------------------------
	-- Complete parser
	-- Combine all parsers using the choice combinator <\|>
	-- ----------------------------------------------------

	-- Parsing alternatives
	-- Often we want to try one parser; if that fails, then try another one instead.
	-- The choice combinator <\|> provides this functionality.
	showParser :: Parser String
	showParser =
	listParser <\|> -- [ ... ]
	tupleParser <\|> -- ( ... )
	try recordParser <\|> -- MkRec { ... }
	adtParser <\|> -- MkADT ...
	number <\|> -- signed integer
	quotedString <?> "Parse error"


	-- -----------------------------------
	-- Parsers for the derived Show format
	-- -----------------------------------


	-- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error
	-- return :: a -> m a Source
	-- Inject a value into the monadic type.
	quotedString = do
	s <- stringLiteral
	return $ "\"" ++ s ++ "\""


	number = do
	n <- integer
	return $ show n


	listParser = do
	-- parse brackets and comma separated values
	ls <- brackets $ commaSep showParser
	-- a list [1, 2, 3] will be outputed as:
	-- <list>
	-- <list-elt>1</list-elt>
	-- <list-elt>2</list-elt>
	-- <list-elt>3</list-elt>
	-- </list>
	return $ tag "list" $ joinNL $ map (tag "list-elt") ls


	-- tupleParser is not used in the test example
	tupleParser = do
	-- parse parens and comma separated values
	ls <- parens $ commaSep showParser
	-- a tuple (1, 2) will be outputed as:
	-- <tuple>
	-- <tuple-elt>1</tuple-elt>
	-- <tuple-elt>2</tuple-elt>
	-- </tuple>
	return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls


	{-\|
	MkPersonRecord {
	name = "Jeremy Singer",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 42,
	labels = [Blue,Yellow]
	}

	turns into:

	<record name="MkPersonRecord">
	<elt key="name">"Jeremy Singer"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">42</elt>
	<elt key="labels">
	<list>
	<list-elt><adt>Blue</adt></list-elt>
	<list-elt><adt>Yellow</adt></list-elt>
	</list>
	</elt>
	</record>

	using:
	- recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number
	- kvParser for: address =
	- recordParser for: MkAddress { .... }
	- kvParser for: labels =
	- adtParser for: Blue and Yellow
	-}
	recordParser = do
	-- a Record has a Type Identifier, ex: MkPersonRecord
	ti <- typeIdentifier
	-- braces and comma separated values parsed by kvParser
	ls <- braces $ commaSep kvParser
	return $ tagAttrs "record" [("name", ti)] (joinNL ls)


	{-\|
	labels = [Blue,Yellow]

	turns into

	<elt key="labels">
	<list>
	<list-elt><adt>Blue</adt></list-elt>
	<list-elt><adt>Yellow</adt></list-elt>
	</list>
	</elt>

	using:
	- kvParser for: labels =
	- listParser for: [ .., ..]
	- adtParser for: Blue and Yellow
	-}
	adtParser = do
	ti <- typeIdentifier
	return $ tag "adt" ti


	{-\|
	name = "Jeremy Singer",

	turns into:

	<elt key="name">"Jeremy Singer"</elt>
	-}
	kvParser = do
	k <- identifier
	symbol "="
	t <- showParser
	return $ tagAttrs "elt" [("key", k)] t


	typeIdentifier = do
	-- a Type identifier begins with a Capital Letter
	fst <- oneOf ['A' .. 'Z']
	-- read the rest ... (many alphanumerics)
	rest <- many alphaNum
	whiteSpace
	return $ fst:rest



	-- The Parsec.Token module provides a number of basic parsers.
	-- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition.
	-- Here we use emptyDef from the Language module.
	lexer = P.makeTokenParser emptyDef

	-- shorter name for the predefined parsers

	parens = P.parens lexer
	brackets = P.brackets lexer
	braces = P.braces lexer
	commaSep = P.commaSep lexer
	whiteSpace = P.whiteSpace lexer
	symbol = P.symbol lexer
	identifier = P.identifier lexer
	integer = P.integer lexer
	stringLiteral = P.stringLiteral lexer