Last active
August 29, 2015 14:15
-
-
Save knoguchi/ea72b939027a9bccbfc9 to your computer and use it in GitHub Desktop.
Pythonでコンパイラ: PL/0パーサー ref: http://qiita.com/knoguchi/items/ee949989d0a9f04bee6f
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
VAR x, squ; | |
PROCEDURE square; | |
BEGIN | |
squ := x * x | |
END; | |
BEGIN | |
x := 1; | |
WHILE x <= 10 DO | |
BEGIN | |
CALL square; | |
! squ; | |
x := x + 1; | |
END | |
END. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
condition = "odd" expression | | |
expression ("="|"#"|"<"|"<="|">"|">=") expression . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> condition.parseString('odd 1') | |
(['ODD', '1'], {}) | |
>>> condition.parseString('3 <= 1') | |
(['3', '<=', '1'], {}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
assign_statement = ident + ":=" + expression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
call_statement = CALL + ident |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
statement = Forward() | |
if_statement = IF + condition + THEN + statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
while_statement = WHILE + condition + DO + statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
statement = [ ident ":=" expression | "call" ident | | |
"begin" statement {";" statement } "end" | | |
"if" condition "then" statement | | |
"while" condition "do" statement ]. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
statement = Optional(assign_statement | |
| call_statement | |
| BEGIN + statement + ZeroOrMore(";" + statement) + END | |
| if_statement | |
| while_statement | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const = CONST + ident + "=" + number + ZeroOrMore("," + ident + "=" + number) + ";" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CONST = CaselessKeyword('CONST') | |
VAR = CaselessKeyword('VAR') | |
: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var = VAR + ident + ZeroOrMore("," + ident) + ";" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
block = Forward() | |
procedure = PROCEDURE + ident + ";" + block + ";" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
block << Optional(const) + Optional(var) + ZeroOrMore(procedure) + statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
program = block + "." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python pl0_parser.py ex1.pl0 | |
Traceback (most recent call last): | |
File "pl0_parser.py", line 64, in <module> | |
print program.parseString(txt) | |
File "/usr/lib/python2.7/dist-packages/pyparsing.py", line 1041, in parseString | |
raise exc | |
pyparsing.ParseException: Expected "." (at char 59), (line:8, col:1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> statement.parseString('''\ | |
... BEGIN | |
... x := 1; | |
... WHILE x <= 10 DO | |
... BEGIN | |
... CALL square; | |
... ! squ; | |
... x := x + 1; | |
... END | |
... END | |
... ''') | |
([], {}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python pl0_parser.py ex1.pl0 | |
['VAR', 'x', ',', 'squ', ';', 'PROCEDURE', 'square', ';', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', ';', 'BEGIN', 'x', ':=', '1', ';', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', ';', 'x', ':=', 'x', '+', '1', ';', 'END', 'END', '.'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyparsing import CaselessKeyword, MatchFirst | |
(CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD) = map(CaselessKeyword, | |
"CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD".replace(",","").split()) | |
keyword = MatchFirst((CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> print keyword.parseString('CONST') | |
['CONST'] | |
>>> print keyword.parseString('const') | |
['CONST'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyparsing import Word, alphas, alphanums | |
ident = ~keyword + Word(alphas, alphanums+"_") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> print repr(ident.parseString('valid_id')) | |
(['valid_id'], {}) | |
>>> print repr(ident.parseString('0123bad_id')) | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/usr/lib/python2.7/dist-packages/pyparsing.py", line 1041, in parseString | |
raise exc | |
pyparsing.ParseException: Expected W:(abcd...,abcd...) (at char 0), (line:1, col:1) | |
>>> print repr(ident.parseString('CONST')) | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in <module> | |
File "/usr/lib/python2.7/dist-packages/pyparsing.py", line 1041, in parseString | |
raise exc | |
pyparsing.ParseException: Found unwanted token, {"CONST" | "VAR" | "PROCEDURE" | "CALL" | "BEGIN" | "END" | "IF" | "THEN" | "WHILE" | "DO" | "ODD"} (at char 0), (line:1, col:1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
expression = [ "+"|"-"] term { ("+"|"-") term}. | |
term = factor {("*"|"/") factor}. | |
factor = ident | number | "(" expression ")" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
number = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") | |
term = Forward() | |
factor = Forward() | |
expression = Optional(oneOf("+ -")) + term + ZeroOrMore( oneOf("+ -") + term) | |
term << factor + ZeroOrMore(oneOf("* /") + factor) | |
factor << ident | number | "(" + expression + ")" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> expression.parseString('123') | |
(['123'], {}) | |
>>> expression.parseString('123+456') | |
(['123', '+', '456'], {}) | |
>>> expression.parseString('(x+y)*z') | |
(['(', 'x', '+', 'y', ')', '*', 'z'], {}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ident = alpha { alpha | number | '_' } . | |
program = block "." . | |
block = [ "const" ident "=" number {"," ident "=" number} ";"] | |
[ "var" ident {"," ident} ";"] | |
{ "procedure" ident ";" block ";" } statement . | |
statement = [ ident ":=" expression | "call" ident | | |
"begin" statement {";" statement } "end" | | |
"if" condition "then" statement | | |
"while" condition "do" statement ]. | |
condition = "odd" expression | | |
expression ("="|"#"|"<"|"<="|">"|">=") expression . | |
expression = [ "+"|"-"] term { ("+"|"-") term}. | |
term = factor {("*"|"/") factor}. | |
factor = ident | number | "(" expression ")". |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyparsing import CaselessKeyword, MatchFirst, Word, alphas, alphanums, Forward, Optional, oneOf, ZeroOrMore, Regex | |
# 1. reserved keyword | |
(CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD) = map(CaselessKeyword, | |
"CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD".replace(",", "").split()) | |
keyword = MatchFirst((CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD)) | |
# 2. identifier | |
ident = ~keyword + Word(alphas, alphanums + "_") | |
# 3. expression | |
number = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") | |
term = Forward() | |
factor = Forward() | |
expression = Optional(oneOf("+ -")) + term + ZeroOrMore(oneOf("+ -") + term) | |
term << (factor + ZeroOrMore(oneOf("* /") + factor)) | |
factor << (ident | number | "(" + expression + ")") | |
# 4. condition | |
condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression | |
# 5. assignment | |
assign_statement = ident + ":=" + expression | |
# 6. call | |
call_statement = CALL + ident | |
# 7. if-then | |
statement = Forward() | |
if_statement = IF + condition + THEN + statement | |
# 8. while-do | |
while_statement = WHILE + condition + DO + statement | |
# 9. statement | |
statement << Optional(assign_statement | |
| call_statement | |
| BEGIN + statement + ZeroOrMore(";" + statement) + END | |
| if_statement | |
| while_statement | |
) | |
# 10. const | |
const = CONST + ident + "=" + number + ZeroOrMore("," + ident + "=" + number) + ";" | |
# 11. var | |
var = VAR + ident + ZeroOrMore("," + ident) + ";" | |
# 12. procedure | |
block = Forward() | |
procedure = PROCEDURE + ident + ";" + block + ";" | |
# 13. block | |
block << Optional(const) + Optional(var) + ZeroOrMore(procedure) + statement | |
# 14. program | |
program = block + "." | |
if __name__ == '__main__': | |
import sys | |
with open(sys.argv[1], 'r') as fp: | |
txt = fp.read() | |
print program.parseString(txt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment