Last active
May 19, 2023 19:23
-
-
Save Superstar64/da7ac23c7f7795d5bd93025002eab686 to your computer and use it in GitHub Desktop.
Lazy parser combinators in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Combinator where | |
{- | |
mnemonics: | |
pos : position | |
str : string | |
err : error | |
-} | |
import Control.Applicative (Alternative, empty, many, (<|>)) | |
import Control.Monad (ap, liftM) | |
import Data.List (uncons) | |
class Token t where | |
step :: t -> Position -> Position | |
class Monoid e => Error e where | |
errorAt :: Position -> e -> a | |
data Position = Position | |
{ file :: String, | |
line :: Int, | |
column :: Int | |
} | |
deriving (Show) | |
data Status s e | |
= Valid | |
| Error e s | |
deriving (Show) | |
data Consumption s e | |
= Fresh (Status s e) | |
| Consumed (Status s e) | |
deriving (Show) | |
-- If consumption has an error, then location, remaining, and probably value are bottom. | |
data Result e s a = Result | |
{ consumption :: Consumption s e, | |
value :: a, | |
location :: Position, | |
remaining :: s | |
} | |
deriving (Show) | |
-- e is error | |
-- s is string | |
newtype Parser e s a = Parser {runParser :: Position -> s -> Result e s a} | |
errorResult err pos str = Result (Fresh (Error err str)) fail fail fail | |
where | |
fail = errorAt pos err | |
instance Token Char where | |
step '\n' (Position file line _) = Position file (line + 1) 1 | |
step _ (Position file line column) = Position file line (column + 1) | |
instance Error () where | |
errorAt (Position file line column) () = error $ "Parser error at " ++ file ++ ":" ++ show line ++ ":" ++ show column | |
instance Functor (Parser e s) where | |
fmap = liftM | |
instance Applicative (Parser e s) where | |
pure = return | |
(<*>) = ap | |
instance Monad (Parser e s) where | |
return x = Parser $ \pos str -> Result (Fresh Valid) x pos str | |
Parser p >>= f = Parser $ \pos str -> case p pos str of | |
~(Result con1 a pos str) -> case runParser (f a) pos str of | |
~(Result con2 a pos str) -> Result (merge con1 con2) a pos str | |
where | |
merge (Consumed Valid) err = Consumed $ case err of | |
Fresh err -> err | |
Consumed err -> err | |
merge (Consumed (Error err str)) _ = Consumed (Error err str) | |
merge (Fresh (Error err str)) _ = Fresh (Error err str) | |
merge (Fresh Valid) err = err | |
instance Error e => Alternative (Parser e s) where | |
empty = Parser $ \pos str -> errorResult mempty pos str | |
Parser p <|> Parser q = Parser $ \pos str -> case p pos str of | |
result | |
| Result (Consumed _) _ _ _ <- result -> result | |
| Result (Fresh Valid) _ _ _ <- result -> result | |
| Result (Fresh (Error err _)) _ _ _ <- result -> case q pos str of | |
result | |
| Result (Consumed _) _ _ _ <- result -> result | |
| Result (Fresh Valid) _ _ _ <- result -> result | |
| Result (Fresh (Error err' _)) _ _ _ <- result -> errorResult (err <> err') pos str | |
position :: Parser e s Position | |
position = Parser $ \pos str -> Result (Fresh Valid) pos pos str | |
-- no monad plus definition because right identity can't be satified | |
-- like parsec, <?> only changes the error if the parser doesn't consume input | |
infix 0 <?> | |
(<?>) :: Error e => e -> Parser e s a -> Parser e s a | |
err <?> Parser p = Parser $ \pos str -> case p pos str of | |
result | |
| Result (Consumed _) _ _ _ <- result -> result | |
| Result (Fresh Valid) _ _ _ <- result -> result | |
| Result (Fresh (Error _ _)) _ _ _ <- result -> errorResult err pos str | |
try :: Error e => Parser e s a -> Parser e s a | |
try (Parser p) = Parser $ \pos str -> case p pos str of | |
Result (Consumed (Error _ _)) _ _ _ -> errorResult mempty pos str | |
result -> result | |
satifyFromBind :: (Token t, Error e) => (s -> Maybe (t, s)) -> (t -> Maybe a) -> Parser e s a | |
satifyFromBind uncons f = Parser $ \pos str -> case uncons str of | |
Just (c, str) | Just v <- f c -> Result (Consumed Valid) v (step c pos) str | |
_ -> errorResult mempty pos str | |
satifyFrom :: (Token t, Error e) => (s -> Maybe (t, s)) -> (t -> Bool) -> Parser e s t | |
satifyFrom uncons f = satifyFromBind uncons $ \c -> if f c then Just c else Nothing | |
satifyBind :: (Token t, Error e) => (t -> Maybe a) -> Parser e [t] a | |
satifyBind = satifyFromBind uncons | |
satify :: (Token t, Error e) => (t -> Bool) -> Parser e [t] t | |
satify = satifyFrom uncons | |
exact :: (Token t, Error e, Eq t) => t -> Parser e [t] t | |
exact c = satify (== c) | |
char :: (Token t, Error e) => Parser e [t] t | |
char = satify (const True) | |
stream file = Position file 1 1 | |
stdin = stream "stdin" | |
testLazyness :: Char | |
testLazyness = head (value result) | |
where | |
result :: Result () String String | |
result = runParser ((++) <$> many (exact 'a') <*> many (exact 'b')) stdin $ 'a' : undefined |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment