Created
June 14, 2009 09:45
-
-
Save jkramer/129625 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Mud.Thing.Parse where | |
import Text.ParserCombinators.Parsec | |
import Mud.Thing.Data | |
import Mud.Thing.Eval | |
-- Parse comment (starting with '#' until end of line). | |
comment = do | |
position <- getPosition | |
char '#' | |
comment <- manyTill (noneOf "\n") (char '\n') | |
return $ Comment position comment | |
-- Parse a reserved word (aka keyword). | |
reserved = do | |
position <- getPosition | |
word <- many1 letter | |
if word `elem` reservedWords | |
then return $ Reserved position word | |
else pzero | |
-- Check for a statement separator (semikolon or linebreak). | |
separator = do | |
position <- getPosition | |
character <- try $ oneOf ";\n" | |
return $ Separator position character | |
-- Parse an operator character. | |
operator = do | |
position <- getPosition | |
character <- oneOf "~=" | |
return $ Operator position character | |
-- Parse an identifier, consisting of a sigil and a word of just letters. | |
identifier = do | |
position <- getPosition | |
sigil <- try $ oneOf "%&" | |
name <- many1 letter | |
return $ Identifier position (sigil : name) | |
property = do | |
position <- getPosition | |
try $ char ':' | |
name <- many1 letter | |
return $ Property position name | |
call = do | |
position <- getPosition | |
try $ char '!' | |
name <- many1 letter | |
return $ Call position name | |
-- Parse a normal string enclosed by double quotes. | |
-- TODO: Escapes! | |
doubleQuotedString = char '"' >> manyTill (noneOf "\"") (char '"') | |
-- Parse a text block enclosed by "***". | |
blockString = string "***" >> manyTill anyChar (string "***") | |
-- Just a number. | |
number = many1 digit | |
-- Check for end of file. | |
fileEnd = do | |
position <- getPosition | |
try $ eof | |
return $ EOF position | |
-- Parse a scalar (normal string, text block or number). | |
scalar = do | |
position <- getPosition | |
content <- (try doubleQuotedString <|> try blockString <|> try number) <?> "scalar" | |
return $ Scalar position content | |
-- Parse a list (zero or more scalars or even more lists). | |
list = do | |
position <- getPosition | |
char '[' | |
content <- sepBy (scalar <|> list <|> identifier) (many1 space) | |
char ']' | |
return $ List position content | |
-- Parse a single token. | |
parseToken = (comment <?> "comment") | |
<|> (property <?> "property") | |
<|> (call <?> "function call") | |
<|> (reserved <?> "keyword") | |
<|> (scalar <?> "scalar") | |
<|> (identifier <?> "identifier") | |
<|> (operator <?> "operator") | |
<|> (separator <?> "separator") | |
<|> (fileEnd <?> "end of input") | |
<|> (list <?> "list") | |
-- Parse a single token, or, if the token is the beginning of a block, | |
-- return a block token that contains even more tokens. | |
block = do | |
position <- getPosition | |
token <- parseToken | |
case token of | |
(Reserved _ "do") -> do { blockContent position } | |
_ -> return token | |
where | |
manyTillDone = do | |
skipMany (oneOf " \t\r" <?> "") | |
token <- parseToken | |
case token of | |
(Reserved _ "done") -> return [] | |
_ -> do { rest <- manyTillDone; return $ token : rest } | |
blockContent position = do | |
rest <- manyTillDone | |
return $ Block position rest | |
-- Main tokenizing function. Loops until EOF and returns all tokens. | |
tokenize = do | |
skipMany (oneOf " \t\r" <?> "") | |
token <- block | |
case token of | |
(EOF _) -> return [token] | |
_ -> tokenizeRest token <|> return [token] | |
where | |
tokenizeRest token = do | |
moreTokens <- tokenize | |
return $ token : moreTokens | |
-- Load a file and tokenize its content. | |
tokenizeFile source = do | |
result <- parseFromFile tokenize source | |
case (result) of | |
Left error -> do { putStrLn $ "error at " ++ show error; return [] } | |
Right tokens -> return $ splitExpression tokens | |
-- List of reserved words. | |
reservedWords = [ "inherit", "from", "when", "do", "done", "function", "is" ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment