Last active
April 30, 2024 11:32
-
-
Save mihassan/948846f5578ce0dadc5f54e3a6ee481f to your computer and use it in GitHub Desktop.
This Haskell script demonstrates a simple expression parser with monadic combinators.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env cabal | |
{- cabal: | |
build-depends: base | |
-} | |
{-# LANGUAGE DeriveFunctor #-} | |
import Control.Applicative | |
import Control.Monad | |
import Data.Char | |
-- | This Haskell script demonstrates a simple expression parser with monadic combinators. | |
-- | It can be broken into five parts: | |
-- | 1. A mini parser combinator library with monadic combinators. | |
-- | 2. A tokenizer that converts a string into a list of tokens. | |
-- | 3. A function that converts a list of tokens into a postfix notation. | |
-- | 4. A function that converts a list of tokens into an expression tree. | |
-- | 5. A function that evaluates an expression tree. | |
-- | Parser implementation with monadic combinators | |
data Parser a = Parser | |
{ runParser :: String -> Maybe (String, a) | |
} | |
deriving (Functor) | |
parse :: Parser a -> String -> Maybe a | |
parse p i = case runParser p i of | |
Just (_, x) -> Just x | |
_ -> Nothing | |
instance Applicative Parser where | |
pure x = Parser $ \i -> Just (i, x) | |
(<*>) = ap | |
instance Monad Parser where | |
return = pure | |
p >>= f = Parser $ \i -> do | |
(i', x) <- runParser p i | |
runParser (f x) i' | |
instance Alternative Parser where | |
empty = Parser $ const Nothing | |
p1 <|> p2 = Parser $ \i -> runParser p1 i <|> runParser p2 i | |
-- | Basic parsers | |
anyChar :: Parser Char | |
anyChar = Parser $ \i -> case i of | |
c : cs -> Just (cs, c) | |
_ -> Nothing | |
satisfy :: (Char -> Bool) -> Parser Char | |
satisfy p = anyChar >>= \c -> if p c then return c else empty | |
eof :: Parser () | |
eof = anyChar *> empty <|> return () | |
-- | Parser combinators | |
between :: Parser a -> Parser b -> Parser c -> Parser b | |
between p1 p2 p3 = p1 *> p2 <* p3 | |
surroundedBy :: Parser a -> Parser b -> Parser b | |
surroundedBy p1 p2 = between p1 p2 p1 | |
trim :: Parser a -> Parser a | |
trim = surroundedBy (many space) | |
-- | Few more parsers | |
space :: Parser Char | |
space = satisfy isSpace | |
char :: Char -> Parser Char | |
char c = satisfy (== c) | |
string :: String -> Parser String | |
string = mapM char | |
symbol :: String -> a -> Parser a | |
symbol s x = string s *> return x | |
digit :: Parser Char | |
digit = satisfy isDigit | |
int :: Parser Int | |
int = read <$> some digit | |
double :: Parser Double | |
double = do | |
n <- int | |
_ <- char '.' | |
m <- int | |
return $ read $ show n ++ "." ++ show m | |
num :: Parser Double | |
num = double <|> (fromIntegral <$> int) | |
-- | Tokenizer | |
data Operator | |
= OperatorPlus | |
| OperatorMinus | |
| OperatorMult | |
| OperatorDiv | |
| OperatorPow | |
| OperatorLParen | |
| OperatorRParen | |
deriving (Show, Eq) | |
operator :: Parser Operator | |
operator = | |
symbol "+" OperatorPlus | |
<|> symbol "-" OperatorMinus | |
<|> symbol "**" OperatorPow | |
<|> symbol "*" OperatorMult | |
<|> symbol "/" OperatorDiv | |
<|> symbol "(" OperatorLParen | |
<|> symbol ")" OperatorRParen | |
precedence :: Operator -> Int | |
precedence OperatorPlus = 1 | |
precedence OperatorMinus = 1 | |
precedence OperatorMult = 2 | |
precedence OperatorDiv = 2 | |
precedence OperatorPow = 3 | |
precedence OperatorLParen = error "Left parentheses is handled separately" | |
precedence OperatorRParen = error "Right parentheses is handled separately" | |
data Token | |
= TokenNum Double | |
| TokenOperator Operator | |
deriving (Show, Eq) | |
token :: Parser Token | |
token = | |
TokenNum <$> num | |
<|> TokenOperator <$> operator | |
tokens :: Parser [Token] | |
tokens = many (trim token) | |
-- Try to tokenize the whole input string or return Nothing if it fails. | |
tokenize :: String -> Maybe [Token] | |
tokenize = parse $ tokens <* eof | |
-- Prefix to Postfix conversion using the Shunting-yard algorithm. | |
toPostfix :: [Token] -> Maybe [Token] | |
toPostfix ts = go [] ts | |
where | |
go :: [Operator] -> [Token] -> Maybe [Token] | |
-- Base case, all tokens have been processed | |
go [] [] = Just [] | |
go (op : ops) [] = case op of | |
OperatorLParen -> Nothing | |
OperatorRParen -> error "Right parentheses should not be in the operator stack" | |
_ -> (TokenOperator op :) <$> go ops [] | |
go ops (t : ts) = case t of | |
-- Put operands to the output. | |
TokenNum n -> (TokenNum n :) <$> go ops ts | |
TokenOperator op -> case op of | |
-- Push left parentheses to the operator stack. | |
OperatorLParen -> go (op : ops) ts | |
-- Pop operators from the operator stack to the output until a left parentheses is encountered. | |
OperatorRParen -> case ops of | |
-- If there is no left parentheses in the operator stack, then the expression is invalid. | |
[] -> Nothing | |
-- Discard the left parentheses. | |
(OperatorLParen : ops') -> go ops' ts | |
-- Pop operators to the output. | |
(op' : ops') -> (TokenOperator op' :) <$> go ops' (t : ts) | |
-- Handle other operators. | |
_ -> case ops of | |
-- Push the operator to the operator stack. | |
[] -> go [op] ts | |
-- Pop operators from the operator stack to the output until an operator with higher precedence is encountered. | |
op' : ops' -> | |
if op' /= OperatorLParen && precedence op' >= precedence op | |
then (TokenOperator op' :) <$> go ops' (t : ts) | |
else go (op : ops) ts | |
-- | Expression parser | |
data Expr | |
= ExprNum Double | |
| ExprOperator Operator Expr Expr | |
deriving (Show) | |
parseExpr :: [Token] -> Maybe Expr | |
parseExpr ts = go [] ts | |
where | |
go :: [Expr] -> [Token] -> Maybe Expr | |
go [e] [] = Just e | |
go (e2 : e1 : es) (TokenOperator op : ts) = go (ExprOperator op e1 e2 : es) ts | |
go es (TokenNum n : ts) = go (ExprNum n : es) ts | |
evalExpr :: Expr -> Double | |
evalExpr (ExprNum n) = n | |
evalExpr (ExprOperator o e1 e2) = case o of | |
OperatorPlus -> evalExpr e1 + evalExpr e2 | |
OperatorMinus -> evalExpr e1 - evalExpr e2 | |
OperatorMult -> evalExpr e1 * evalExpr e2 | |
OperatorDiv -> evalExpr e1 / evalExpr e2 | |
OperatorPow -> evalExpr e1 ** evalExpr e2 | |
-- | Main | |
main :: IO () | |
main = do | |
let Just t = tokenize "1.2 ** 3.4 + (5 + 6 * 7 - 8.9) / (10 + 11) * 12 / 13 / 14" | |
let r = 1.2 ** 3.4 + (5 + 6 * 7 - 8.9) / (10 + 11) * 12 / 13 / 14 | |
print t | |
let Just p = toPostfix t | |
print p | |
let Just e = parseExpr p | |
print e | |
print $ evalExpr e | |
print $ r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment