Skip to content

Instantly share code, notes, and snippets.

@kirelagin
Created June 6, 2012 18:18
Show Gist options
  • Save kirelagin/2883710 to your computer and use it in GitHub Desktop.
Save kirelagin/2883710 to your computer and use it in GitHub Desktop.
Parsgen
!<!
module Parse where
!>!
s -> e { #val = !<! #1.val !>! }
e -> t e' { #val = !<! #2.val !>!
#t = !<! #1.val !>!
#tt = !<! #1.val !>! }
e' -> P t e' { #val = !<! #3.val !>!
#tt = !<! ##.t + #2.val !>! }
e' -> _ { #val = !<! ##.tt !>! }
t -> f t' { #val = !<! #2.val !>!
#f = !<! #1.val !>!
#ff = !<! #1.val !>! }
t' -> X f t' { #val = !<! #3.val !>!
#ff = !<! ##.f * #2.val !>! }
t' -> _ { #val = !<! ##.ff !>! }
f -> L e R { #val = !<! #2.val !>! }
f -> N { #val = !<! #1.val !>! }
!<!
data Token = P ()
| X ()
| L ()
| R ()
| N Int
| Tend ()
deriving (Eq, Show)
type AttrType = Int
!>!
import Data.List
import Data.Map (Map)
import qualified Data.Map as M
import Control.Monad.State
type AttrsType = Map String AttrType
curToken :: State [Token] Token
curToken = state $ f where
f s@[] = (Tend (), s)
f s@(t:_) = (t, s)
parse ts = let attrs = fst (runState (parse_s M.empty) (ts ++ repeat (Tend ()))) in attrs
--------
module Main where
import Control.Arrow
import Control.Monad.State
import Data.List
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Language.Haskell.Exts (prettyPrint, parseModuleWithMode, ParseResult (..), preludeFixities, ParseMode (..), Extension (..))
import System.IO
import Text.Regex
import Types
import Parsegram (parsegram, tokenise)
main = do
inp <- getContents
let (before, parsed, after) = parsegram . tokenise $ inp
rules = process $ parsed
text = before ++ "\n" ++ boilerplate ++ (translateGrammar rules) ++ "\n" ++ after
hPutStrLn stderr $ showFirsts rules
hPutStrLn stderr $ "---"
hPutStrLn stderr $ showFollows rules
--putStrLn $ text
--putStrLn $ "----------!!!-------"
putStrLn . codeFormat $ text
process :: [(Nonterminal, Rule)] -> NontermRules
process l = M.fromListWith (++) $ map (second (\x -> [x])) l
calcFirst :: NontermRules -> [RuleItem]-> Set Terminal
calcFirst _ [] = S.singleton "_eps"
calcFirst _ ((Term t):cs) = S.singleton t
calcFirst r ((Nonterm n):cs) = nontermFirst n
where nontermFirst n = foldr S.union S.empty (map (calcFirst r . getItems) $ r M.! n)
calcFollow :: NontermRules -> Nonterminal -> Set Terminal
calcFollow r n = M.foldrWithKey (\k v b -> followNonterm k v `S.union` b) (if n == "s" then S.singleton "Tend" else S.empty) r
where followNonterm :: Nonterminal -> [Rule] -> Set Terminal
followNonterm a rules = foldr (\v b -> followRule v `S.union` b) S.empty rules
where followRule :: Rule -> Set Terminal
followRule (rule,_) = followItems rule
followItems :: [RuleItem] -> Set Terminal
followItems [] = S.empty
followItems ((Term _):cs) = followItems cs
followItems ((Nonterm t):cs) = (if t == n then if "_eps" `S.member` firstG then (S.delete "_eps" firstG) `S.union` (if t == a then S.empty else calcFollow r a) else firstG else S.empty) `S.union` followItems cs
where firstG = calcFirst r cs
-- Translation
say = modify . flip (++)
sayLn s = say s >> say "\n"
saySt s = say s >> say ";" >> say "\n"
magic m = snd $ flip runState "" m
translateGrammar :: NontermRules -> String
translateGrammar r = intercalate "\n" $ M.foldrWithKey (\k v b -> translateNontermRules r k v : b) [] r
translateNontermRules :: NontermRules -> Nonterminal -> [Rule] -> String
translateNontermRules r n rules = magic $ do
sayLn $ "parse_" ++ n ++ " :: AttrsType -> State [Token] AttrsType"
sayLn $ "parse_" ++ n ++ " pAttrs = do {"
saySt $ "t <- curToken"
sayLn $ "case t of {"
saySt $ translateRulesList rules
saySt $ "}"
sayLn $ "}"
where translateRulesList :: [Rule] -> String
translateRulesList [] = "_ -> error $ \"Unexpected token \" ++ show t;"
translateRulesList (rule:rs) = translateRule r n rule ++ (translateRulesList rs)
translateRule :: NontermRules -> Nonterminal -> Rule -> String
translateRule r n rule =
let first = calcFirst r items
follow = calcFollow r n
chklist = if "_eps" `S.member` first then (S.delete "_eps" first) `S.union` follow else first
in
intercalate ";\n" . S.toList $ S.map translType chklist
where translType tp = magic $ do
sayLn $ tp ++ " _ -> do {"
saySt $ "s1 <- get"
sayLn $ "let {"
saySt $ "parentAttrs = (" ++ translateAttrs attrs ++ ") `M.union` pAttrs"
say $ intercalate "\n" $ map (uncurry translateItem) (zip [1..] items)
sayLn $ "};"
saySt $ "put s" ++ show ((length items)+1)
saySt $ "return parentAttrs"
sayLn $ "};"
(items, attrs) = rule
translateItem number (Nonterm n) = magic $ do
let mys = show number
nexts = show (number+1)
saySt $ "(attrs_" ++ mys ++ ", s" ++ nexts ++ ") = runState (parse_" ++ n ++ " parentAttrs) s" ++ mys
translateItem number (Term t) = magic $ do
let mys = show number
nexts = show (number+1)
sayLn $ "(attrs_" ++ mys ++ ", s" ++ nexts ++ ") = case (head s" ++ mys ++ ") of {"
saySt $ "(" ++ t ++ " val) -> (M.singleton \"val\" val, tail s" ++ mys ++ ");"
saySt $ "_ -> error $ \"Unexpected token \" ++ show t ++ \". " ++ t ++ " expected.\""
sayLn $ "};"
translateAttrs :: [(String, Code)] -> String
translateAttrs attrs = magic $ do
say $ "M.fromList ["
say $ intercalate "," $ map trAttr attrs
say $ "]"
where trAttr :: (String, Code) -> String
trAttr (attr, code) = "(\"" ++ attr ++ "\"," ++ preprocessed code ++ ")"
preprocessed cd = let
cd1 = subRegex (mkRegex "#([0-9]+)\\.([a-zA-Z]+)") cd "(attrs_\\1 M.! \"\\2\")"
cd2 = subRegex (mkRegex "##\\.([a-zA-Z]+)") cd1 "(parentAttrs M.! \"\\1\")"
in cd2
codeFormat = check . fmap reformat . (parseModuleWithMode mode) where
mode = ParseMode "" [FlexibleContexts, MultiParamTypeClasses] False True (Just preludeFixities)
reformat = prettyPrint
check r = case r of
ParseOk a -> a
ParseFailed loc err -> error $ show (loc,err)
---- JUNK
showFirsts:: NontermRules -> String
showFirsts r = intercalate "\n" $ M.foldrWithKey (\k v b -> (k ++ " : " ++ show v) : b) [] (firsts $ r)
showFollows r = intercalate "\n" $ M.foldrWithKey (\k v b -> (k ++ " : " ++ show v) : b) [] (follows $ r)
firsts :: NontermRules -> Map Nonterminal (Set Terminal)
firsts r = M.mapWithKey (\k _ -> calcFirst r [Nonterm k]) r
follows r = M.mapWithKey (\k _ -> calcFollow r k) r
----- BE CAREFUL! The following lines are generated by Makefile.
boilerplate = "import Data.List\nimport Data.Map (Map)\nimport qualified Data.Map as M\nimport Control.Monad.State\n\n\ntype AttrsType = Map String AttrType\n\ncurToken :: State [Token] Token\ncurToken = state $ f where\n f s@[] = (Tend (), s)\n f s@(t:_) = (t, s)\n\nparse ts = let attrs = fst (runState (parse_s M.empty) (ts ++ repeat (Tend ()))) in attrs\n\n--------\n"
import Data.Char
import qualified Data.Map as M
import Parse
tokenise :: String -> [Token]
tokenise [] = []
tokenise ('+':cs) = P () : tokenise cs
tokenise ('*':cs) = X () : tokenise cs
tokenise ('(':cs) = L () : tokenise cs
tokenise (')':cs) = R () : tokenise cs
tokenise l@(c:cs)
| isSpace c = tokenise cs
| isDigit c = readInt l
where
readInt l = N (read i :: Int) : tokenise rest
(i, rest) = span isDigit l
main = do
cont <- getContents
--putStrLn . show $ (tokenise cont)
print $ (parse $ tokenise cont) M.! "val"
import Data.Char
import qualified Data.Map as M
import Parse
tokenise :: String -> [Token]
tokenise [] = []
tokenise ('+':cs) = OP '+' : tokenise cs
tokenise ('-':cs) = OP '-' : tokenise cs
tokenise l@(c:cs)
| isSpace c = tokenise cs
| isDigit c = readInt l
where
readInt l = NUM (read i :: Int) : tokenise rest
(i, rest) = span isDigit l
main = do
cont <- getContents
--putStrLn . show $ (tokenise cont)
let res = (parse $ tokenise cont) M.! "val"
putStrLn $ case res of
Val i -> show i
_ -> "fail"
import Data.Char
import qualified Data.Map as M
import Parse
tokenise :: String -> [Token]
tokenise [] = []
tokenise ('(':cs) = LP () : tokenise cs
tokenise (')':cs) = RP () : tokenise cs
tokenise ('{':cs) = LN () : tokenise cs
tokenise ('}':cs) = RN () : tokenise cs
tokenise ('[':cs) = LS () : tokenise cs
tokenise (']':cs) = RS () : tokenise cs
tokenise ('.':cs) = DOT () : tokenise cs
tokenise l@(c:cs)
| isSpace c = tokenise cs
| isAlpha c = readStr l
where readStr l = STR s : tokenise rest
(s, rest) = span isAlpha l
main = do
cont <- getContents
putStrLn $ (parse $ tokenise cont) M.! "val"
parsgen : Main.hs Parsegram.hs Types.hs
ghc $^ -o $@
Parsegram.hs : Parsegram.y Types.hs
happy -gac $< -o $@
Main.hs : boilerplate.hs
if tail -n 1 $@ | grep -q boilerplate; then \
sed -i '$$d' $@;\
fi
echo 'main = interact (("boilerplate = " ++) . show)' > tmp.hs
runhaskell tmp.hs < $< >> $@
rm -f tmp.hs
.PHONY: clean
clean :
rm -rf *.o *.hi parsgen Parsegram.hs
!<!
module Parse where
import Data.Char
import Data.List
!>!
s -> NUM e' { #val = !<! Val $ case #2.func of { (Func f) -> f #1.val; } !>! }
e' -> s a e' { #func = !<! Func $ \x -> case #1.val of { (Val i) -> case #3.func of { (Func f) -> case #2.op of { (Op '+') -> f (x+i); (Op '-') -> f(x-i);};};} !>! }
e' -> _ { #func = !<! Func $ id !>! }
a -> OP { #op = !<! Op #1.val !>! }
!<!
data Token = NUM Int
| OP Char
| Tend ()
deriving (Show)
data AttrType = Val Int | Func (Int -> Int) | Op Char
!>!
{
module Parsegram (
parsegram, tokenise
) where
import Data.Char
import Control.Monad
import Types
}
%name parsegram
%error { parseError }
%tokentype { Token }
%token
TERM { TTerm $$ }
NONTERM { TNonterm $$ }
ATTR { TAttr $$ }
'->' { TArrow }
'!<!' { TCodeOpen }
'!>!' { TCodeClose }
CODE { TCode $$ }
'{' { TLBrace }
'}' { TRBrace }
'=' { TEq }
'_' { TEpsilon }
%%
GrammarFile :: { (String, [(Nonterminal, Rule)], String) }
: '!<!' CODE '!>!' NontermRuleSeq '!<!' CODE '!>!' { ($2, $4, $6) }
NontermRuleSeq :: { [(Nonterminal, Rule)] }
: NontermRule NontermRuleSeq { $1 : $2 }
| NontermRule { [$1] }
NontermRule :: { (Nonterminal, Rule) }
: NONTERM '->' ItemSeq '{' Attrs '}' { ($1, ($3, $5)) }
| NONTERM '->' ItemSeq '{' '}' { ($1, ($3, [])) }
Attrs :: { [(String, Code) ]}
: Attr Attrs { $1 : $2 }
| Attr { [$1] }
Attr :: { (String, Code) }
: ATTR '=' '!<!' CODE '!>!' { ($1, $4) }
ItemSeq :: { [RuleItem] }
: Item ItemSeq { $1 : $2 }
| Item { [$1] }
| '_' { [] }
Item :: { RuleItem }
Item : TERM { Term $1 }
| NONTERM { Nonterm $1 }
{
parseError = error . show
data Token = TTerm Terminal
| TNonterm Nonterminal
| TAttr String
| TArrow
| TCodeOpen
| TCodeClose
| TCode String
| TLBrace
| TRBrace
| TEq
| TEpsilon
deriving (Show)
tokenise [] = []
tokenise ('-':'>':cs) = TArrow : tokenise cs
tokenise ('{':cs) = TLBrace : tokenise cs
tokenise ('}':cs) = TRBrace : tokenise cs
tokenise ('_':cs) = TEpsilon : tokenise cs
tokenise ('#':cs) = readHaskellId
where readHaskellId = TAttr i : tokenise rest
(i, rest) = span (liftM2 (||) isAlphaNum (=='_')) cs
tokenise ('=':cs) = TEq : tokenise cs
tokenise ('!':'<':'!':cs) = TCodeOpen : readCode cs ""
where readCode ('!':'>':'!':cs) l = (TCode l) : TCodeClose : tokenise cs
readCode (c:cs) l = readCode cs (l ++ [c])
readCode [] l = [TCode l]
tokenise l@(c:cs)
| isSpace c = tokenise cs
| isAlpha c = readId l
where readId l@(c:cs) = ((if isAsciiUpper c then TTerm else TNonterm) i) : tokenise rest
(i, rest) = span (liftM2 (||) isAlpha (=='\'')) l
}
!<!
module Parse where
import Data.Map (Map)
import qualified Data.Map as M
!>!
s -> LN c DOT s RN { #val = !<! #2.val !>!
#sv = !<! #4.val !>! }
s -> LP STR RP { #val = !<! #2.val !>! }
c -> LS STR RS { #val = !<! #2.val ++ ##.sv !>! }
!<!
data Token = LN ()
| RN ()
| LP ()
| RP ()
| LS ()
| RS ()
| STR String
| DOT ()
| Tend ()
deriving (Eq, Show)
type AttrType = String
!>!
2 3 + 1 - 2 2 + -
{[a].{[b].(c)}}
module Types where
import Data.Map (Map)
type NontermRules = Map Nonterminal [Rule]
type Rule = ([RuleItem], [(String, Code)])
data RuleItem = Term String | Nonterm String deriving (Show)
type Nonterminal = String
type Terminal = String
type Code = String
getItems = fst
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment