Last active
March 6, 2018 17:03
-
-
Save gabriel-fallen/2ad1af4f1d06b8fe9e59f8d1472114b8 to your computer and use it in GitHub Desktop.
Pseudohaskell for continuation-based XML SAX-parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser where | |
import Control.Monad.Cont | |
import Control.Monad.Except | |
import Data.ByteString | |
import Streaming | |
import qualified Streaming.Prelude as S | |
import Xeno.Streaming | |
-- | A record we want to parse from beneath unknown layers of XML | |
data Greeting = Greeting | |
{ gHello :: ByteString | |
, gWorld :: ByteString | |
} deriving (Eq, Show) | |
testXML = "<?xml version=\"1.1\"?>\n<foo><greeting><hello>Привет</hello><world>мир</world></greeting></foo>" | |
parseGreeting = do | |
skipUntilOpeningTag "greeting" | |
-- greeting <- Greeting <$> parseHello <*> parseWorld | |
greeting <- do | |
[h, w] <- allOf ["hello", "world"] [parseHello, parseWorld] | |
pure $ Greeting h w | |
skipUntilClosingTag "greeting" | |
pure greeting | |
where | |
parseHello = do | |
hello <- extractText -- надо, конечно, уточнить семантику экстракта - с тегами там или без | |
skipUntilClosingTag "hello" | |
pure hello | |
parseWorld = do | |
world <- extractText | |
skipUntilClosingTag "world" | |
pure world | |
-- Теперь осталось соорудить на продолжениях монаду, в которой это будет работать. :D | |
skipUntilOpeningTag tag = do | |
callCC skipUntilOpeningTag' | |
where | |
skipUntilOpeningTag' k = do | |
s <- get -- for simplicity, actually it's Parser monad not necessarily State monad | |
res <- S.next s | |
case res of | |
Left _ -> throwError $ ParserError "EOF" | |
Right (event, s') -> do | |
case event of | |
OpenTag t -> if t == tag then k else put s' >> skipUntilOpeningTag' k | |
_ -> put s' >> skipUntilOpeningTag' k | |
allOf tags parsers = do | |
callCC $ allOf' tags parsers [] | |
where | |
allOf' [] _ results k = k results -- we've seen all tags and supposedly parsed them all | |
allOf' waitlist ps rs k = do | |
s <- get -- for simplicity, actually it's Parser monad not necessarily State monad | |
res <- S.next s | |
case res of | |
Left _ -> throwError $ ParserError "EOF" | |
Right (event, s') -> do | |
case event of | |
OpenTag t -> if t `elem` waitlist | |
then do | |
let i = index t waitlist | |
p = ps !! i | |
res <- p | |
put s' | |
allOf' (remove i waitlist) (remove i ps) (res : rs) k -- не тот порядок результатов, надо его исправить с помощью продолжений | |
else put s' >> allOf' waitlist ps rs k | |
_ -> put s' >> allOf' waitlist ps rs k | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment