Skip to content

Instantly share code, notes, and snippets.

@kuribas
Last active June 16, 2020 08:12
Show Gist options
  • Save kuribas/8abd511d03b60f3113a4c81e0e01308f to your computer and use it in GitHub Desktop.
Save kuribas/8abd511d03b60f3113a4c81e0e01308f to your computer and use it in GitHub Desktop.
xml parser
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables#-}
{-# LANGUAGE FlexibleContexts #-}
module Text.XML.Expat.Tree.Parser where
import Data.Bifunctor
import Control.Monad.Trans (lift)
import Control.Monad.State hiding (fail, lift)
import Control.Applicative hiding (many)
import Control.Monad.Except hiding (fail, lift)
import Control.Monad.Fail
import Control.Monad.Reader hiding (fail, lift)
import qualified Data.List.Class as List
import Data.List.Class (List, ItemM)
import Text.XML.Expat.SAX
import qualified Data.Text as Text
import Data.Text (Text)
import Control.Monad.Combinators
type EventLoc = (SAXEvent Text Text, XMLParseLocation)
type Attrs = [(Text, Text)]
data SAXStream l =
-- list for unordered parsers
UnOrdered [EventLoc] |
-- stream for ordered parsers
Ordered (l EventLoc)
newtype EventParser l m a =
EventParser (ReaderT Attrs (StateT (SAXStream l) (ExceptT Text m)) a)
deriving (Functor, Applicative, Monad, MonadError Text)
newtype AttrParser a = AttrParser (StateT Attrs (Either Text) a)
deriving (Functor, Applicative, Monad, MonadError Text)
class ParserAttr a where
parseAttr :: Text -> Either Text a
lookupRemove :: (Eq k) => k -> [(k, v)] -> Maybe (v, [(k, v)])
lookupRemove _ [] = Nothing
lookupRemove k1 ((k2, v):rest)
| k1 == k2 = Just (v,rest)
| otherwise = second ((k2, v):) <$> lookupRemove k1 rest
-- | an attribute parser which returns the value for that attribute.
getAttr :: ParserAttr a => Text -> AttrParser a
getAttr attr = AttrParser $ do
attrs <- get
case lookupRemove attr attrs of
Nothing -> throwError $ "Attribute " <> attr <> " required."
Just (v, st) ->
do put st;
either throwError pure $ parseAttr v
-- | run an attribute parser, which must consume all attributes.
parseAll :: Monad (ItemM l) => AttrParser a -> EventParser l (ItemM l) a
parseAll (AttrParser attrP) = EventParser $ do
attrs <- ask
case runStateT attrP attrs of
Left err -> throwError err
Right (a, []) -> pure a
Right (a, otherAttrs) ->
throwError $
"Unknown attributes: " <> Text.unwords (map fst otherAttrs)
-- | run an attribute parser without consuming any attributes.
parseSome :: Monad (ItemM l) => AttrParser a -> EventParser l (ItemM l) a
parseSome (AttrParser attrP) = EventParser $ do
attrs <- ask
case runStateT attrP attrs of
Left err -> throwError err
Right (a, _) -> pure a
instance Monad m => MonadFail (EventParser l m) where
fail s = throwError $ Text.pack s
instance Monad m => Alternative (EventParser l m) where
p <|> q = catchError p $ const q
empty = throwError "Parse failure: empty"
instance Monad m => MonadPlus (EventParser l m) where
mplus = (<|>)
mzero = empty
liftListT :: Monad (ItemM l) => ItemM l a -> EventParser l (ItemM l) a
liftListT = EventParser . lift . lift . lift
-- | Annotate the parser with a better error message.
(<?>) :: Monad m => EventParser l m a -> Text -> EventParser l m a
parser <?> msg = parser <|> throwError msg
-- | Parse a tag. Parser the children in the order or the inner parser.
someTag :: Monad (ItemM l)
=> (Text -> EventParser l (ItemM l) a)
-- ^ a function which takes the tagname, and produces a parser
-- which parses the children of the tag. The children tags
-- are parsed in order.
-> EventParser l (ItemM l) a
someTag inner = EventParser $ do
elems <- get
case elems of
UnOrdered [] -> throwError "Unexpected end of input."
UnOrdered lst -> _
Ordered lst -> _
-- |
someUnorderedTag :: (Text -> EventParser l (ItemM l) a)
-> EventParser l (ItemM l) a
someUnorderedTag inner = _
-- | Skip next tag
skipTag :: Monad (ItemM l) => EventParser l (ItemM l) ()
skipTag = someTag $ const skipTags
-- | Skip remaining tags, if any.
skipTags :: Monad (ItemM l) => EventParser l (ItemM l) ()
skipTags = void $ many skipTag
-- | Skip zero or more tags until the given parser succeeds
skipTagsTill :: Monad (ItemM l) => EventParser l (ItemM l) a
-> EventParser l (ItemM l) a
skipTagsTill = skipManyTill skipTag
expectName :: Monad m => EventParser l m a -> Text -> Text
-> EventParser l m a
expectName inner name1 name2 =
(guard (name1 == name2) <?> ("Expected <" <> name1 <> ">")) *> inner
-- | Parse a tag with the given name, using the inner parser for the
-- children tags. The children tags are parsed in the order of the
-- inner parser.
tag :: Monad (ItemM l)
=> Text -> EventParser l (ItemM l) a
-> EventParser l (ItemM l) a
tag name inner = someTag $ expectName inner name
-- | Parse a tag with the given name, using the inner parser for the
-- children tags. The children tags can be in any order. Note that
-- this is less efficient than an orderedTag, since it has to keep
-- track of all unmatched tags.
unorderedTag :: Monad (ItemM l)
=> Text -> EventParser l (ItemM l) a
-> EventParser l (ItemM l) a
unorderedTag name inner = someUnorderedTag $ expectName inner name
-- | Parse a tag which can have no children. Pass the tagname to the
-- inner parser. If the tag has children, an error is raised.
someEmptyTag :: Monad (ItemM l)
=> (Text -> EventParser l (ItemM l) a)
-> EventParser l (ItemM l) a
someEmptyTag inner = someTag $ \name -> do
hasChildren <- (True <$ skipTag) <|> pure False
when hasChildren $ throwError "Unexpected children."
inner name
-- | Parser a tag with the given name which can have no children. If
-- the tag has children, an error is raised.
emptyTag :: Monad (ItemM l) => Text -> EventParser l (ItemM l) a
-> EventParser l (ItemM l) a
emptyTag name inner = someEmptyTag $ expectName inner name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment