Last active
August 29, 2015 14:15
-
-
Save knoguchi/c407fb186550aef6d22a to your computer and use it in GitHub Desktop.
Pythonでコンパイラ: PL/0構文木 ref: http://qiita.com/knoguchi/items/6f9b7383b7252a9ebdad
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
['VAR', 'x', ',', 'squ', ';', 'PROCEDURE', 'square', ';', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', ';', 'BEGIN', 'x', ':=', '1', ';', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', ';', 'x', ':=', 'x', '+', '1', ';', 'END', 'END', '.'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 先頭に追加. | |
LPAR, RPAR, COMMA, SEMICOLON, DOT = map(Suppress, "(),;.") | |
# 文字列を定数で置き換える | |
# before | |
# factor << (ident | number | "(" + expression + ")") | |
# after | |
factor << (ident | number | LPAR + expression + RPAR) | |
# 同様に, ; . も定数で置き換える |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 4. condition | |
#condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression | |
condition = infixNotation( | |
expression, | |
[ | |
(ODD, UNARY, opAssoc.RIGHT), | |
(oneOf("< <= > >="), BINARY, opAssoc.LEFT), | |
(oneOf("= #"), BINARY, opAssoc.LEFT), | |
] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', ['x', '*', 'x'], 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', ['x', '<=', '10'], 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', ['x', '+', '1'], 'END', 'END'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> print expression.parseString('1 + 2 * 3 + 4') | |
[['1', '+', ['2', '*', '3'], '+', '4']] | |
>>> print expression.parseString('1 + 2 / 3 * 4 - -5') | |
[['1', '+', ['2', '/', '3', '*', '4'], '-', ['-', '5']]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python pl0_parser.py ex1.pl0 | |
['VAR', 'x', 'squ', 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 11. var | |
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 11. var | |
def var_list(tokens): | |
tokens = tokens.asList() | |
return [tokens[0], tokens[1:]] | |
var = VAR + ident + ZeroOrMore(COMMA + ident) + SEMICOLON | |
var.setParseAction(var_list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 11. var | |
class Var(object): | |
def __init__(self, tokens): | |
tokens = tokens.asList() | |
self.variables = tokens[1] | |
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON | |
var.setParseAction(Var) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[<__main__.Var object at 0x10d418710>, 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# term = Forward() | |
# factor = Forward() | |
# expression = Optional(oneOf("+ -")) + term + ZeroOrMore(oneOf("+ -") + term) | |
# term << (factor + ZeroOrMore(oneOf("* /") + factor)) | |
# factor << (ident | number | LPAR + expression + RPAR) | |
# infixNotationは演算子の優先順位を定義する。 | |
# 同位の演算子は1行で書くこと。 | |
UNARY, BINARY, TERNARY = 1, 2, 3 | |
factor = ident | number | |
expression = infixNotation( | |
factor, | |
[ | |
(oneOf("+ -"), UNARY, opAssoc.RIGHT), # 符号は最優先。 | |
(oneOf("* /"), BINARY, opAssoc.LEFT), # 掛け算割り算は足し算引き算より優先 | |
(oneOf("+ -"), BINARY, opAssoc.LEFT), | |
] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from pyparsing import * | |
LPAR, RPAR, COMMA, SEMICOLON, DOT = map(Suppress, "(),;.") | |
# 1. reserved keyword | |
(CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD) = map(CaselessKeyword, | |
"CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD".replace(",", "").split()) | |
keyword = MatchFirst((CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD)) | |
# 2. identifier | |
ident = ~keyword + Word(alphas, alphanums + "_") | |
# 3. expression | |
number = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") | |
UNARY, BINARY, TERNARY = 1, 2, 3 | |
factor = ident | number | |
expression = infixNotation( | |
factor, | |
[ | |
(oneOf("+ -"), UNARY, opAssoc.RIGHT), # 符号は最優先 | |
(oneOf("* /"), BINARY, opAssoc.LEFT), # 掛け算割り算は足し算引き算より優先 | |
(oneOf("+ -"), BINARY, opAssoc.LEFT), | |
] | |
) | |
# 4. condition | |
#condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression | |
condition = infixNotation( | |
expression, | |
[ | |
(ODD, UNARY, opAssoc.RIGHT), | |
(oneOf("< <= > >="), BINARY, opAssoc.LEFT), | |
(oneOf("= #"), BINARY, opAssoc.LEFT), | |
] | |
) | |
# 5. assignment | |
assign_statement = ident + ":=" + expression | |
# 6. call | |
call_statement = CALL + ident | |
# 7. if-then | |
statement = Forward() | |
if_statement = IF + condition + THEN + statement | |
# 8. while-do | |
while_statement = WHILE + condition + DO + statement | |
# 9. statement | |
statement << Optional(assign_statement | |
| call_statement | |
| BEGIN + statement + ZeroOrMore(SEMICOLON + statement) + END | |
| if_statement | |
| while_statement | |
) | |
# 10. const | |
const = CONST + Group(Group(ident + "=" + number) + ZeroOrMore(COMMA + ident + "=" + number)) + SEMICOLON | |
# 11. var | |
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON | |
# 12. procedure | |
block = Forward() | |
procedure = PROCEDURE + ident + SEMICOLON + block + SEMICOLON | |
# 13. block | |
block << Optional(const) + Optional(var) + ZeroOrMore(procedure) + statement | |
# 14. program | |
program = block + DOT | |
if __name__ == '__main__': | |
import sys | |
with open(sys.argv[1], 'r') as fp: | |
txt = fp.read() | |
print program.parseString(txt) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment