Created
March 20, 2013 04:48
-
-
Save ab9rf/5202374 to your computer and use it in GitHub Desktop.
Grammar mangler. This code is a program I'm working on that I'm using to take the benighted grammar that PHP uses, simplify it, and infer types for nonterminals. I wrote it because I realized that the work I was doing to adapt the grammar was repetitive and tedious, so of course the only logical thing to do was write code to do it. At the moment…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import qualified Data.Text as T | |
import qualified Text.Regex.PCRE.Light as R | |
import qualified Data.ByteString.Char8 as B | |
import qualified Data.Map as Map | |
import Control.Monad.State | |
import Control.Monad (liftM) | |
import Data.Ord (comparing) | |
import Data.List (sortBy, mapAccumL) | |
import Data.Maybe | |
mangle t = "PT" ++ (T.unpack (T.concat (map ucfirst pieces))) | |
where pieces = T.split (=='_') (T.pack t) | |
ucfirst s = T.append (T.toUpper f) (T.toLower b) | |
where (f,b) = T.splitAt 1 s | |
notype "LT_HEREDOC_START" = True | |
notype "LT_HEREDOC_END" = True | |
notype _ = False | |
lextype "LT_VARNAME" = Just "PVVariableName" | |
lextype "LT_IDENT" = Just "PVIdent" | |
lextype "LT_VARNAME_IMBED" = Just "PVVariableNameImbed" | |
lextype "LT_INTEGER" = Just "PVInteger" | |
lextype "LT_DOUBLE" = Just "PVDouble" | |
lextype "LT_STRING" = Just "PVString" | |
lextype "INLINE_HTML" = Just "PVInline" | |
lextype _ = Nothing | |
lextypes = ["PVVariableName", "PVIdent", "PVVariableNameImbed", | |
"PVInteger", "PVDouble", "PVString", "PVInline"] | |
mangleline curr line = mangleline' curr m1 m2 | |
where r1 = R.compile (B.pack "^(\\w+)\\s*::\\s*\\{.*\\}") [] | |
r2 = R.compile (B.pack "^\\s*[:|]\\s*(.*)") [] | |
m1 = liftM (map $ B.unpack) (R.match r1 line []) | |
m2 = liftM (map $ B.unpack) (R.match r2 line []) | |
mangleline' st (Just [_,nonterm]) _ = | |
(nonterm,[]):st | |
mangleline' ((nt,l):rst) Nothing (Just [_,prod]) = | |
(nt,(l ++ [tokens])):rst | |
where tokens = t' prod | |
t' "{- empty -}" = [] | |
t' str = map T.unpack (T.words (T.pack str)) | |
mangleline' st Nothing Nothing = st | |
removeTerminals :: [String] -> [String] | |
removeTerminals l = filter (\s -> (head s) /= '\'') l | |
type TypeMap = Map.Map String String | |
type ProdMap = Map.Map String [[String]] | |
getType :: ProdMap -> String -> State TypeMap String | |
getType m nt = do table <- get | |
t <- case nt `Map.lookup` table of | |
Just v -> return v | |
Nothing -> ty p'' | |
put (Map.insert nt t table) | |
return t | |
where p = fromJust (nt `Map.lookup` m) | |
p' = (map removeTerminals p) | |
p'' = (sortBy (comparing length) p') | |
ty [] = return "()" | |
ty [[a]] = do t <- getType m a; return t | |
ty [[],[nt']] | nt == nt' = do return "Int"; {- self-circular -} | |
ty [[],[a]] = do t <- getType m a; return ("Maybe " ++ t) | |
ty [[],[nt',a]] | nt == nt' = do t <- getType m a; return ("[" ++ t ++ "]") | |
ty _ = return (mangle nt) | |
makeMap :: Ord a => [(a,b)] -> Map.Map a b | |
makeMap l = foldl (\m (k,v) -> Map.insert k v m) Map.empty l | |
main = do m <- f; print m | |
f = do file <- B.getContents | |
prods <- return (foldl mangleline [] (B.lines file)) | |
m <- return (makeMap prods) | |
m' <- return (evalState (mapM (getType m) (Map.keys m)) Map.empty) | |
return m' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment