Created
July 22, 2022 05:32
-
-
Save srijan-paul/87abfeacb84f0d862d093b3ae899cf67 to your computer and use it in GitHub Desktop.
Code for the blog at https://deepsource.io/blog/monadic-parser-combinators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Combinators | |
( result, | |
zero, | |
item, | |
sat, | |
char, | |
digit, | |
upper, | |
lower, | |
letter, | |
alphanum, | |
string, | |
many', | |
many1, | |
then', | |
thenList, | |
sepBy, | |
bracket, | |
eval | |
) | |
where | |
import Control.Applicative (Alternative (empty, (<|>))) | |
import Control.Monad (MonadPlus (..), void) | |
import qualified Data.Bifunctor as Bifunctor | |
import Data.Char (isDigit, isLower, isSpace, isUpper) | |
import Data.Text.Internal.Read (digitToInt) | |
newtype Parser a = Parser {parse :: String -> [(a, String)]} | |
result :: a -> Parser a | |
result val = Parser $ \inp -> [(val, inp)] | |
zero :: Parser a | |
zero = Parser $ const [] | |
item :: Parser Char | |
item = Parser parseItem | |
where | |
parseItem [] = [] | |
parseItem (x : xs) = [(x, xs)] | |
-- sat :: (Char -> Bool) -> Parser Char | |
-- sat p = Parser parseIfSat | |
-- where | |
-- parseIfSat (x : xs) = if p x then [(x, xs)] else [] | |
-- parseIfSat [] = [] | |
instance Functor Parser where | |
fmap f p = Parser (fmap (Bifunctor.first f) . parse p) | |
instance Applicative Parser where | |
pure = result | |
p1 <*> p2 = Parser $ \inp -> do | |
(f, inp') <- parse p1 inp | |
(a, inp'') <- parse p2 inp' | |
return (f a, inp'') | |
instance Monad Parser where | |
-- Parser a -> (a -> Parser b) -> Parser b | |
p >>= f = Parser $ \inp -> | |
concat [parse (f v) inp' | (v, inp') <- parse p inp] | |
-- a -> Parser a | |
return = result | |
sat :: (Char -> Bool) -> Parser Char | |
sat p = | |
-- Apply `item`, if it fails on an empty string, we simply short circuit and get `[]`. | |
item >>= \x -> | |
if p x | |
then result x | |
else zero | |
char :: Char -> Parser Char | |
char x = sat (== x) | |
digit :: Parser Char | |
digit = sat isDigit | |
lower :: Parser Char | |
lower = sat isLower | |
upper :: Parser Char | |
upper = sat isUpper | |
-- Applies two parsers to the same input, then returns a list | |
-- containing results returned by both of them. | |
plus :: Parser a -> Parser a -> Parser a | |
p `plus` q = Parser $ \inp -> parse p inp ++ parse q inp | |
or' :: Parser a -> Parser a -> Parser a | |
p `or'` q = Parser $ \inp -> case parse (p `plus` q) inp of | |
[] -> [] | |
(x : xs) -> [x] | |
instance Alternative Parser where | |
empty = zero | |
(<|>) = or' | |
instance MonadPlus Parser where | |
mzero = zero | |
mplus = plus | |
letter :: Parser Char | |
letter = lower <|> upper | |
alphanum :: Parser Char | |
alphanum = letter <|> digit | |
string :: String -> Parser String | |
string "" = result "" | |
string (x : xs) = | |
char x >> string xs >> result (x : xs) | |
many' :: Parser a -> Parser [a] | |
many' p = | |
do | |
x <- p -- apply p once | |
xs <- many' p -- recursively apply `p` as many times as possible | |
return (x : xs) | |
<|> return [] | |
many1 :: Parser a -> Parser [a] | |
many1 p = do | |
x <- p | |
xs <- many' p | |
return (x : xs) | |
then' :: (a -> b -> c) -> Parser a -> Parser b -> Parser c | |
then' combine p q = | |
p >>= \x -> | |
q >>= \xs -> | |
result $ combine x xs | |
thenList :: Parser a -> Parser [a] -> Parser [a] | |
thenList = then' (:) | |
ident :: Parser String | |
ident = alpha_ `thenList` many' (alpha_ <|> digit) | |
where | |
alpha_ = letter <|> char '_' | |
-- Accept a list of sequences forming an `a`, separated by sequences forming a `b`. | |
sepBy :: Parser a -> Parser b -> Parser [a] | |
p `sepBy` sep = do | |
x <- p | |
xs <- many' (sep >> p) | |
return (x : xs) | |
bracket :: Parser a -> Parser b -> Parser c -> Parser b | |
bracket open p close = open >> p <* close | |
idList :: Parser [String] | |
idList = bracket (char '[') ids (char ']') | |
where | |
ids = ident `sepBy` char ',' | |
nat :: Parser Int | |
nat = read <$> many1 digit | |
spaces :: Parser () | |
spaces = void $ many' $ sat isSpace | |
token :: Parser a -> Parser a | |
token p = p <* spaces | |
parse' :: Parser a -> Parser a | |
parse' p = spaces >> p | |
identifier :: Parser String | |
identifier = token ident | |
-- consume a character and discard all trailing whitespace | |
charToken :: Char -> Parser Char | |
charToken = token <$> char | |
-- an ADT representing a parse tree for expressions | |
data Expr | |
= Add Expr Expr | |
| Sub Expr Expr | |
| Par Expr | |
| Lit Int | |
deriving (Show) | |
eval' :: Expr -> Int | |
eval' (Add a b) = eval' a + eval' b | |
eval' (Sub a b) = eval' a - eval' b | |
eval' (Par a) = eval' a | |
eval' (Lit a) = a | |
eval :: String -> Int | |
eval = fst . Bifunctor.first eval' . head <$> parse expr | |
chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a | |
p `chainl1` op = do | |
first <- p | |
rest <- many' $ do | |
f <- op | |
term <- p | |
return (f, term) | |
return $ foldl (\x (f, y) -> f x y) first rest | |
-- Our expression parser expects a string of the following grammar: | |
-- expr ::= term (op term)* | |
-- op ::= '+' | '-' | |
-- term ::= int | '(' expr ')' | |
-- int ::= [0-9]* | |
-- The `expr` parser first consumes an atomic term - <X>, then it | |
-- consumes a series of "<op> <operand>"s and packs them into tuples like ((+), 2) | |
-- We then fold the list of tuples using <X> as the initial value to produce the result. | |
expr :: Parser Expr | |
expr = term `chainl1` op | |
-- term := int | parens | |
term :: Parser Expr | |
term = int <|> parens | |
-- parens := '(' expr ')' | |
parens :: Parser Expr | |
parens = bracket (char '(') expr (char ')') | |
-- int := [0-9]* | |
int :: Parser Expr | |
int = Lit <$> token nat | |
-- op := '+' | '-' | |
op :: Parser (Expr -> Expr -> Expr) | |
op = makeOp '+' Add <|> makeOp '-' Sub | |
where | |
makeOp x f = charToken x >> return f |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment