Created
September 16, 2013 11:37
-
-
Save cfchou/6579584 to your computer and use it in GitHub Desktop.
Sample code from Graham Hutton and Erik Meijer's paper "Monadic Parser Combinators" is written in Gofer. Here's a rewrite in Haskell.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char | |
import Control.Monad | |
import Control.Monad.Trans.State.Lazy (StateT(..), runStateT) | |
import Control.Monad.Trans.Reader | |
import Control.Monad.State.Class | |
import Control.Monad.Trans.Class | |
type Pos = (Int, Int) -- (line, column) | |
type PString = (Pos, String) | |
type P a = ReaderT Pos (StateT PString []) a | |
{-- | |
instance MonadState s m => MonadState s (ReaderT r m) where | |
get = lift . get -- get :: P s | |
put = lift . put -- put :: s -> P () | |
--} | |
runP :: P a -> Pos -> PString -> [(a, PString)] | |
runP p r s = runStateT (runReaderT p r) s | |
item :: P Char | |
-- item :: ReaderT Pos (StateT PString []) Char | |
item = ask >>= \dp -> | |
get >>= \(p, s) -> | |
if onside p dp then | |
case s of | |
[] -> mzero | |
(x:_) -> modify newstate >> | |
return x | |
else | |
mzero | |
onside :: Pos -> Pos -> Bool | |
onside (l, c) (dl, dc) = | |
c > dc || l == dl | |
-- row and column begin from 1 | |
newstate :: PString -> PString | |
newstate ((l, c), (x:xs)) = | |
case x of | |
'\n' -> ((l+1, 1), xs) | |
'\t' -> ((l, (c `div` 8 + 1) * 8), xs) | |
_ -> ((l, c + 1), xs) | |
sat :: (Char -> Bool) -> P Char | |
sat p = item >>= \c -> | |
if p c then return c | |
else mzero | |
char :: Char -> P Char | |
char c = sat (== c) | |
lower :: P Char | |
lower = sat isLower | |
upper :: P Char | |
upper = sat isUpper | |
letter :: P Char | |
letter = lower `mplus` upper | |
digit :: P Char | |
digit = sat isDigit | |
alphanum :: P Char | |
alphanum = letter `mplus` digit | |
ident :: P String | |
ident = lower >>= \x -> | |
many alphanum >>= \xs -> | |
return (x:xs) | |
string :: String -> P String | |
string [] = return [] | |
string (x : xs) = char x >> | |
string xs >> | |
return (x : xs) | |
-- (++) :: P a -> P a -> P a | |
-- (++) = mplus | |
(+++) :: P a -> P a -> P a | |
p +++ q = ReaderT $ \r -> | |
StateT $ \s -> | |
case runP (p `mplus` q) r s of | |
[] -> [] | |
(x:_) -> [x] | |
many1 :: P a -> P [a] | |
many1 p = p >>= \a -> | |
many p >>= \as -> | |
return (a : as) | |
many :: P a -> P [a] | |
many p = many1 p `mplus` return [] | |
-- use many1, otherwise recurse forever in cases like "junk"! | |
spaces :: P () | |
spaces = many1 (sat isSpace) >> return () | |
where isSpace c = (c == ' ') || (c == '\t') || (c == '\n') | |
comment :: P () | |
comment = string "--" >> | |
many (sat (/= '\n')) >> | |
return () | |
chainl1 :: P a -> P (a -> a -> a) -> P a | |
chainl1 p op = p >>= \a -> rest a | |
where rest a = (op >>= \f -> | |
p >>= \b -> | |
-- rest (f a b)) +++ (return a) | |
rest (f a b)) `mplus` (return a) | |
chainl :: P a -> P (a -> a -> a) -> a -> P a | |
chainl p op a = (p `chainl1` op) `mplus` return a | |
bracket :: P a -> P b -> P a -> P b | |
bracket open p close = open >> | |
p >>= \x -> | |
close >> | |
return x | |
-- many1 ensures offside rule. many1_offsite sets up the rule. | |
{-- | |
let a = b | |
c = d -- many1 can't detect this | |
{<-- pos of outer def | |
let {<-- pos of inner def | |
a = b | |
c = d | |
} | |
} | |
--} | |
many1_offsite :: P a -> P [a] | |
many1_offsite p = get >>= \(pos, str) -> | |
local (const pos) (many1 (off p)) | |
{-- | |
"off" prevents multi definitions in one line. | |
a new definition begins only if the c equals dc. | |
{<-- pos of outer def | |
let {<-- pos of inner def | |
a = b c = d -- not allowed | |
} | |
} | |
--} | |
off :: P a -> P a | |
off p = ask >>= \(dl, dc) -> | |
get >>= \((l, c), str) -> | |
if dc == c then local (const (l, c)) p | |
else mzero | |
-- note the relationship with spaces, many, many1 | |
junk :: P () | |
junk = local (const (0, -1)) (many (spaces +++ comment)) >> return () | |
token :: P a -> P a | |
token p = p >>= \x -> | |
junk >> return x | |
symbol :: String -> P String | |
symbol s = token (string s) | |
identifier :: [String] -> P String | |
identifier xs = token (ident >>= \s -> | |
if not (s `elem` xs) then return s else mzero) | |
-- ============ | |
data Expr = App Expr Expr | |
| Lam String Expr | |
| Let [(String, Expr)] Expr | |
| Var String | |
deriving (Show) | |
variable :: P String | |
variable = identifier ["let", "in"] | |
atom :: P Expr | |
atom = lam +++ letin +++ var +++ paren | |
expr :: P Expr | |
expr = atom `chainl1` (return App) | |
lam :: P Expr | |
lam = symbol "\\" >> | |
variable >>= \v -> | |
symbol "->" >> | |
expr >>= \e -> | |
return (Lam v e) | |
letin :: P Expr | |
letin = symbol "let" >> | |
many1_offsite defs >>= \ps -> | |
get >>= \((l, _), _) -> | |
local (\d@(dl, dc) -> | |
if (l > dl) then (l, dc) | |
else d) ( | |
symbol "in" >> | |
expr) >>= \e -> | |
return (Let ps e) | |
where defs = variable >>= \v -> | |
symbol "=" >> | |
expr >>= \e -> | |
return (v, e) | |
var :: P Expr | |
var = variable >>= \s -> return (Var s) | |
paren :: P Expr | |
paren = bracket (symbol "(") expr (symbol ")") | |
-- ============ | |
-- tests | |
-- ============ | |
-- runP :: P a -> Pos -> PString -> [(a, PString)] | |
a_lambda = runP lam (1, 1) ((1, 1), "\\a -> b") | |
a_var = runP var (1, 1) ((1, 1), "in") | |
a_char = runP item (1, 1) ((1, 1), "abc") | |
a_variable = runP variable (1, 1) ((1, 1), "abc") | |
a_ident = runP ident (1, 1) ((1, 1), "abc") | |
a_spaces = runP spaces (1, 1) ((1, 1), " \n\ | |
\\n\ | |
\aaa") | |
a_comment = runP comment (1, 1) ((1, 1), "--xxx") | |
a_junk = runP junk (1, 1) ((1, 1), " -- aaa \n\ | |
\ \n\ | |
\ --bbb\n\ | |
\ ccc") | |
a_symbol = runP (symbol "\\" >> | |
variable >> | |
symbol "->" >> | |
expr) (1, 1) ((1, 1), "\\a ->b a") | |
a_expr = runP expr (1, 1) ((1, 1), "a b") | |
a_letin = runP letin (1, 1) ((1, 1), "let a = b \n\ | |
\ c = d\n\ | |
\in a") | |
a_letin2 = runP letin (1, 1) ((1, 1), "let a = b\n\ | |
\ c=d in a") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment