Last active
June 16, 2020 08:12
-
-
Save kuribas/8abd511d03b60f3113a4c81e0e01308f to your computer and use it in GitHub Desktop.
xml parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | |
{-# LANGUAGE LambdaCase #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE ScopedTypeVariables#-} | |
{-# LANGUAGE FlexibleContexts #-} | |
module Text.XML.Expat.Tree.Parser where | |
import Data.Bifunctor | |
import Control.Monad.Trans (lift) | |
import Control.Monad.State hiding (fail, lift) | |
import Control.Applicative hiding (many) | |
import Control.Monad.Except hiding (fail, lift) | |
import Control.Monad.Fail | |
import Control.Monad.Reader hiding (fail, lift) | |
import qualified Data.List.Class as List | |
import Data.List.Class (List, ItemM) | |
import Text.XML.Expat.SAX | |
import qualified Data.Text as Text | |
import Data.Text (Text) | |
import Control.Monad.Combinators | |
type EventLoc = (SAXEvent Text Text, XMLParseLocation) | |
type Attrs = [(Text, Text)] | |
data SAXStream l = | |
-- list for unordered parsers | |
UnOrdered [EventLoc] | | |
-- stream for ordered parsers | |
Ordered (l EventLoc) | |
newtype EventParser l m a = | |
EventParser (ReaderT Attrs (StateT (SAXStream l) (ExceptT Text m)) a) | |
deriving (Functor, Applicative, Monad, MonadError Text) | |
newtype AttrParser a = AttrParser (StateT Attrs (Either Text) a) | |
deriving (Functor, Applicative, Monad, MonadError Text) | |
class ParserAttr a where | |
parseAttr :: Text -> Either Text a | |
lookupRemove :: (Eq k) => k -> [(k, v)] -> Maybe (v, [(k, v)]) | |
lookupRemove _ [] = Nothing | |
lookupRemove k1 ((k2, v):rest) | |
| k1 == k2 = Just (v,rest) | |
| otherwise = second ((k2, v):) <$> lookupRemove k1 rest | |
-- | an attribute parser which returns the value for that attribute. | |
getAttr :: ParserAttr a => Text -> AttrParser a | |
getAttr attr = AttrParser $ do | |
attrs <- get | |
case lookupRemove attr attrs of | |
Nothing -> throwError $ "Attribute " <> attr <> " required." | |
Just (v, st) -> | |
do put st; | |
either throwError pure $ parseAttr v | |
-- | run an attribute parser, which must consume all attributes. | |
parseAll :: Monad (ItemM l) => AttrParser a -> EventParser l (ItemM l) a | |
parseAll (AttrParser attrP) = EventParser $ do | |
attrs <- ask | |
case runStateT attrP attrs of | |
Left err -> throwError err | |
Right (a, []) -> pure a | |
Right (a, otherAttrs) -> | |
throwError $ | |
"Unknown attributes: " <> Text.unwords (map fst otherAttrs) | |
-- | run an attribute parser without consuming any attributes. | |
parseSome :: Monad (ItemM l) => AttrParser a -> EventParser l (ItemM l) a | |
parseSome (AttrParser attrP) = EventParser $ do | |
attrs <- ask | |
case runStateT attrP attrs of | |
Left err -> throwError err | |
Right (a, _) -> pure a | |
instance Monad m => MonadFail (EventParser l m) where | |
fail s = throwError $ Text.pack s | |
instance Monad m => Alternative (EventParser l m) where | |
p <|> q = catchError p $ const q | |
empty = throwError "Parse failure: empty" | |
instance Monad m => MonadPlus (EventParser l m) where | |
mplus = (<|>) | |
mzero = empty | |
liftListT :: Monad (ItemM l) => ItemM l a -> EventParser l (ItemM l) a | |
liftListT = EventParser . lift . lift . lift | |
-- | Annotate the parser with a better error message. | |
(<?>) :: Monad m => EventParser l m a -> Text -> EventParser l m a | |
parser <?> msg = parser <|> throwError msg | |
-- | Parse a tag. Parser the children in the order or the inner parser. | |
someTag :: Monad (ItemM l) | |
=> (Text -> EventParser l (ItemM l) a) | |
-- ^ a function which takes the tagname, and produces a parser | |
-- which parses the children of the tag. The children tags | |
-- are parsed in order. | |
-> EventParser l (ItemM l) a | |
someTag inner = EventParser $ do | |
elems <- get | |
case elems of | |
UnOrdered [] -> throwError "Unexpected end of input." | |
UnOrdered lst -> _ | |
Ordered lst -> _ | |
-- | | |
someUnorderedTag :: (Text -> EventParser l (ItemM l) a) | |
-> EventParser l (ItemM l) a | |
someUnorderedTag inner = _ | |
-- | Skip next tag | |
skipTag :: Monad (ItemM l) => EventParser l (ItemM l) () | |
skipTag = someTag $ const skipTags | |
-- | Skip remaining tags, if any. | |
skipTags :: Monad (ItemM l) => EventParser l (ItemM l) () | |
skipTags = void $ many skipTag | |
-- | Skip zero or more tags until the given parser succeeds | |
skipTagsTill :: Monad (ItemM l) => EventParser l (ItemM l) a | |
-> EventParser l (ItemM l) a | |
skipTagsTill = skipManyTill skipTag | |
expectName :: Monad m => EventParser l m a -> Text -> Text | |
-> EventParser l m a | |
expectName inner name1 name2 = | |
(guard (name1 == name2) <?> ("Expected <" <> name1 <> ">")) *> inner | |
-- | Parse a tag with the given name, using the inner parser for the | |
-- children tags. The children tags are parsed in the order of the | |
-- inner parser. | |
tag :: Monad (ItemM l) | |
=> Text -> EventParser l (ItemM l) a | |
-> EventParser l (ItemM l) a | |
tag name inner = someTag $ expectName inner name | |
-- | Parse a tag with the given name, using the inner parser for the | |
-- children tags. The children tags can be in any order. Note that | |
-- this is less efficient than an orderedTag, since it has to keep | |
-- track of all unmatched tags. | |
unorderedTag :: Monad (ItemM l) | |
=> Text -> EventParser l (ItemM l) a | |
-> EventParser l (ItemM l) a | |
unorderedTag name inner = someUnorderedTag $ expectName inner name | |
-- | Parse a tag which can have no children. Pass the tagname to the | |
-- inner parser. If the tag has children, an error is raised. | |
someEmptyTag :: Monad (ItemM l) | |
=> (Text -> EventParser l (ItemM l) a) | |
-> EventParser l (ItemM l) a | |
someEmptyTag inner = someTag $ \name -> do | |
hasChildren <- (True <$ skipTag) <|> pure False | |
when hasChildren $ throwError "Unexpected children." | |
inner name | |
-- | Parser a tag with the given name which can have no children. If | |
-- the tag has children, an error is raised. | |
emptyTag :: Monad (ItemM l) => Text -> EventParser l (ItemM l) a | |
-> EventParser l (ItemM l) a | |
emptyTag name inner = someEmptyTag $ expectName inner name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment