Last active
June 26, 2024 04:05
-
-
Save Revolucent/802c9abcb51c66a841b5b967a7c92807 to your computer and use it in GitHub Desktop.
Haskell parser combinators written from scratch as an exercise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts, FlexibleInstances, FunctionalDependencies #-} | |
module Parsy where | |
import Control.Monad.Error.Class | |
import Data.List (foldr1, singleton, (!!)) | |
import GHC.Base (empty) | |
import RIO hiding (optional) | |
import RIO.Char | |
-- | Wraps a collection. | |
-- | |
-- Parsy is a collection parser, not a stream | |
-- parser. Thus, backtracking is built in, but | |
-- of course it's not suitable for parsing | |
-- huge amounts of data. This is a (fully functional) | |
-- toy built as an exercise in learning. | |
class Collection e c | c -> e where | |
elemAtIndex :: Word -> c -> Maybe e | |
instance Collection Char String where | |
elemAtIndex i s | |
| n < length s = Just $ s !! n | |
| otherwise = Nothing | |
where | |
n = fromIntegral i | |
data ParseError = NoMatch | EOD deriving (Eq, Ord, Show) | |
newtype Parser c a = Parser { parse :: Word -> c -> Either (Word, ParseError) (Word, a) } | |
runParser :: c -> Parser c a -> Either (Word, ParseError) a | |
runParser c parser = snd <$> parse parser 0 c | |
instance Functor (Parser c) where | |
fmap f (Parser parse) = Parser $ \i c -> second f <$> parse i c | |
instance Applicative (Parser c) where | |
pure a = Parser $ \i -> const $ return (i, a) | |
(Parser fparse) <*> (Parser aparse) = Parser $ \i c -> do | |
(n, f) <- fparse i c | |
(x, a) <- aparse n c | |
return (x, f a) | |
instance Monad (Parser c) where | |
return = pure | |
(Parser aparser) >>= f = Parser $ \i c -> do | |
(n, a) <- aparser i c | |
parse (f a) n c | |
instance Alternative (Parser c) where | |
empty = throwParseError NoMatch | |
a <|> b = catchError a (const b) | |
throwParseError :: ParseError -> Parser c a | |
throwParseError e = Parser $ \i -> const $ throwError (i, e) | |
instance MonadError (Word, ParseError) (Parser c) where | |
throwError error = Parser $ const $ const $ throwError error | |
catchError (Parser aparser) handler = Parser $ \i c -> | |
catchError (aparser i c) (\e -> parse (handler e) i c) | |
collectionWithCurrentIndex :: Parser c (Word, c) | |
collectionWithCurrentIndex = Parser $ \i c -> return (i, (i, c)) | |
current :: Collection e c => Parser c e | |
current = do | |
(i, c) <- collectionWithCurrentIndex | |
case elemAtIndex i c of | |
Just e -> return e | |
Nothing -> throwParseError EOD | |
getIndex :: Parser c Word | |
getIndex = Parser $ \i -> const $ return (i, i) | |
setIndex :: Word -> Parser c () | |
setIndex i = Parser $ const $ const $ return (i, ()) | |
advanceBy :: Word -> Parser c () | |
advanceBy delta = Parser $ \i -> const $ return (i + delta, ()) | |
advance :: Parser c () | |
advance = advanceBy 1 | |
eod :: Collection e c => Parser c () | |
eod = do | |
(i, c) <- collectionWithCurrentIndex | |
case elemAtIndex i c of | |
Nothing -> return () | |
Just _ -> throwParseError NoMatch | |
atEOD :: Collection e c => Parser c Bool | |
atEOD = collectionWithCurrentIndex <&> isNothing . uncurry elemAtIndex | |
not :: Parser c a -> Parser c () | |
not (Parser parser) = Parser $ \i c -> do | |
case parser i c of | |
Left _ -> Right (i, ()) | |
Right _ -> Left (i, NoMatch) | |
match :: Collection e c => (e -> Bool) -> Parser c e | |
match predicate = do | |
e <- current | |
if predicate e | |
then advance >> return e | |
else throwParseError NoMatch | |
eq :: (Eq e, Collection e c) => e -> Parser c e | |
eq model = match (== model) | |
string :: Collection Char c => String -> Parser c String | |
string = mapM eq | |
ws :: Collection Char c => Parser c Char | |
ws = match isSpace | |
oneOf :: (Eq e, Collection e c) => [e] -> Parser c e | |
oneOf = foldr1 (<|>) . map eq | |
many1 :: Parser c a -> Parser c [a] | |
many1 parser = liftA2 (:) parser (many parser) | |
snoc :: [a] -> a -> [a] | |
snoc xs x = xs ++ [x] | |
optional :: Parser c a -> Parser c [a] | |
optional parser = catchError (singleton <$> parser) $ const $ return [] | |
manySep :: Parser c a -> Parser c s -> Parser c [a] | |
manySep p s = liftA2 (++) (many (p <* s)) (optional p) | |
many1Sep :: Parser c a -> Parser c s -> Parser c [a] | |
many1Sep p s = liftA2 snoc (many (p <* s)) p |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment