Created
November 4, 2023 12:16
-
-
Save nerodono/c78a012c009331e8ab8b40a0f3845f8d to your computer and use it in GitHub Desktop.
Resulting code of the https://nerodono.github.io/generalizing-recursive-descent/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char ( isDigit ) | |
import qualified Data.Map as M | |
import qualified Data.Bifunctor as Bi | |
import Prelude hiding ( scope, lookup ) | |
data BType = Open | Close | |
deriving Show | |
newtype Operator = Operator String | |
deriving(Show, Eq, Ord) -- Ord will be needed later | |
data Token = TOperator Operator | |
| TNumber Integer | |
| TBracket BType | |
deriving Show | |
compoundOperatorChars :: String | |
compoundOperatorChars = "+-*<>/!?" | |
tokenize :: String -> [Token] | |
-- Skipping the whitespaces | |
tokenize (h:t) | h `elem` " \t" = | |
tokenize t | |
-- Brackets | |
tokenize ('(':t) = TBracket Open : tokenize t | |
tokenize (')':t) = TBracket Close : tokenize t | |
tokenize (h:t) | h `elem` compoundOperatorChars = | |
TOperator compound : tokenize tail' | |
where (rest, tail') = span (`elem` compoundOperatorChars) t | |
compound = Operator $ h:rest | |
-- Parsing digits | |
tokenize (h:t) | isDigit h = | |
token : tokenize tail' | |
where (rest, tail') = span isDigit t | |
token = TNumber $ read (h:rest) | |
tokenize (h:t) = error $ "Unexpected char " ++ [h] | |
tokenize [] = [] | |
-- Precedence store | |
type Precedence = Integer | |
data Store = Store { operators :: M.Map Operator Precedence | |
, scope :: M.Map Precedence Integer | |
, root :: M.Map Precedence Integer | |
} | |
deriving Show | |
splitMin :: Store -> Maybe (Precedence, Store) | |
splitMin (Store operators scope root) = | |
-- M.keys returns keys in the ascending order | |
-- so the first returned key is the least | |
case M.keys scope of | |
[] -> Nothing | |
key:_ -> | |
-- Here' we remove the least precedence from scope | |
-- if number of operators with that precedence is 1 | |
-- or just subtract one, if there's more\ | |
let scope' = M.updateWithKey f key scope | |
f _ v = if v == 1 then Nothing else Just (v - 1) | |
in Just (key, Store operators scope' root) | |
restoreRoot :: Store -> Store | |
restoreRoot (Store operators _ root) = | |
Store operators root root | |
lookup :: Operator -> Store -> Precedence | |
lookup op (Store operators _ _) = | |
case M.lookup op operators of | |
Just r -> r | |
Nothing -> error $ "No such operator" ++ show op | |
fromList :: [( Operator, Precedence )] -> Store | |
fromList list' = | |
let -- Construct map of operator:precedence | |
operators' = M.fromList list' | |
-- Create so-called "scope" | |
-- it's just map of precedence:number-of-operators | |
scope' = foldl f M.empty list' | |
f map' (op, prec') = | |
-- insert or add 1 to existing entry in the scope map | |
M.insertWith (+) prec' 1 map' | |
in Store operators' scope' scope' | |
-- AST | |
data Expr = EBinary Operator Expr Expr | |
| ENumber Integer | |
deriving Show | |
-- The function that will reduce two expressions to one | |
-- (perform binary operation) | |
type BinEvalFn = Operator -> Integer -> Integer -> Integer | |
eval :: BinEvalFn -> Expr -> Integer | |
eval reduce_binary expr = | |
case expr of | |
EBinary operator lhs rhs -> | |
reduce_binary operator (eval' lhs) (eval' rhs) | |
ENumber number -> | |
number | |
where eval' = eval reduce_binary | |
-- Parser | |
type Tailed a = Maybe (a, [Token]) | |
factor :: Store -> [Token] -> Tailed Expr | |
factor store (h:t) = | |
case h of | |
TNumber number -> Just ( ENumber number | |
, t | |
) | |
TBracket Open -> do | |
(expr, t') <- expression store t | |
case t' of | |
-- Expect next token to be a closing bracket | |
TBracket Close:t'' -> | |
Just ( expr | |
, t'' ) | |
-- Fail if it isn't | |
_ -> Nothing | |
-- Unexpected token | |
_ -> Nothing | |
factor _ [] = Nothing | |
type ParseF = [Token] -> Tailed Expr | |
-- Function that consumes left hand side expression | |
-- and remaining tokens and returns pair of resulting expression and the tail | |
type FoldFn = Expr -> [Token] -> Tailed Expr | |
expression :: Store -> [Token] -> Tailed Expr | |
expression store = | |
-- Result of this case would be curried | |
-- So actually we return `[Token] -> Tailed Expr` | |
case splitMin store of | |
Just (precedence, whats_left) -> | |
-- We still have ways go to down | |
binary precedence $ expression whats_left | |
Nothing -> | |
-- Here we on the factor's precedence | |
factor $ restoreRoot store | |
where | |
binary :: Integer -> ParseF -> [Token] -> Tailed Expr | |
binary current_precedence parse tokens = | |
parse tokens >>= \(lhs, t) -> | |
case t of | |
-- Next token should be infix operator | |
TOperator operator:t' -> | |
-- If we're not parsing operator with that precedence | |
-- return only lhs | |
if lookup operator store == current_precedence then | |
-- Before we met first operator | |
foldlExpr lhs (leftFoldFn parse $ expectSameOp operator) t | |
else | |
Just ( lhs | |
, t ) | |
-- Or it's just the end, for example: | |
-- 2: from + to * we got only factor | |
-- but it's still OK | |
_ -> Just ( lhs | |
, t ) | |
leftFoldFn :: ParseF -> ([Token] -> Tailed Operator) -> Expr -> [Token] -> Tailed Expr | |
leftFoldFn parse expectOperator lhs tokens = do | |
(op, t) <- expectOperator tokens | |
(rhs, t') <- parse t | |
Just ( EBinary op lhs rhs | |
, t' ) | |
expectSameOp :: Operator -> [Token] -> Tailed Operator | |
expectSameOp op (TOperator got_op:t) | |
| got_op == op = Just (got_op, t) | |
expectSameOp _ _ = Nothing | |
-- This is just modification of the standard `foldl` | |
-- Folds multiple expressions into one | |
foldlExpr :: Expr -> FoldFn -> [Token] -> Tailed Expr | |
foldlExpr expr fold_fn tokens = | |
case fold_fn expr tokens of | |
Just (expr', tail') -> | |
foldlExpr expr' fold_fn tail' | |
Nothing -> Just (expr, tokens) | |
defaultStore :: Store | |
defaultStore = fromList [(Operator "+", 1), (Operator "-", 1), (Operator "*", 2)] | |
parseText :: String -> Tailed Expr | |
parseText = expression defaultStore . tokenize | |
parseTextAsSExpr :: String -> Tailed String | |
parseTextAsSExpr = | |
fmap (Bi.first toSExpr) . parseText | |
toSExpr :: Expr -> String | |
toSExpr (EBinary (Operator op) lhs rhs) = | |
"(" ++ op ++ " " ++ toSExpr lhs ++ " " ++ toSExpr rhs ++ ")" | |
toSExpr (ENumber number) = | |
show number | |
evalText :: String -> Integer | |
evalText text = | |
case parseText text of | |
Just (tree, []) -> | |
eval evaluateBinary tree | |
Just (tree, t) -> | |
error $ "Failed to parse entire expr (" ++ toSExpr tree ++ "): tail is " ++ show t | |
Nothing -> | |
error "Failed to parse expression" | |
where | |
evaluateBinary op lhs rhs = | |
case op of | |
Operator "+" -> lhs + rhs | |
Operator "-" -> lhs - rhs | |
Operator "*" -> lhs * rhs | |
-- test it via ghci | |
-- :l result | |
-- evalText "2 + 2 * 2" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment