Created
October 18, 2016 05:57
-
-
Save dresswithpockets/9222059733379a9a3b22e6b312d675dd to your computer and use it in GitHub Desktop.
Stretch v2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict | |
# Compiler Tokens | |
FLOAT_CONSTANT = 'FLOAT_CONSTANT' | |
INTEGER_CONSTANT = 'INTEGER_CONSTANT' | |
HEX_CONSTANT = 'HEX_CONSTANT' | |
STRING_CONSTANT = 'STRING_CONSTANT' | |
CHAR_CONSTANT = 'CHAR_CONSTANT' | |
BOOLEAN_CONSTANT = 'BOOLEAN_CONSTANT' | |
IDENTIFIER = 'IDENTIFIER' | |
EOF = 'EOF' | |
# still deciding whether or not these are essential for Stretch development | |
# in the mean time, these will be non-functional and all global functions | |
# will be forward-declared in the generated C-header | |
PUBLIC = 'public' | |
PRIVATE = 'private' | |
CASE = 'case' | |
DEFAULT = 'default' | |
CONTINUE = 'continue' | |
BREAK = 'break' | |
RETURN = 'return' | |
ANY = 'any' | |
VOID = 'void' | |
BOOL = 'bool' | |
SIGNED8 = 's8' | |
SIGNED16 = 's16' | |
SIGNED32 = 's32' | |
SIGNED64 = 's64' | |
UNSIGNED8 = 'u8' | |
UNSIGNED16 = 'u16' | |
UNSIGNED32 = 'u32' | |
UNSIGNED64 = 'u64' | |
FLOAT32 = 'f32' | |
FLOAT64 = 'f64' | |
STRING = 'string' | |
NULL = 'null' | |
TRUE = 'true' | |
FALSE = 'false' | |
STRUCT = 'struct' | |
ENUM = 'enum' | |
ALIAS = 'alias' | |
FOREIGN = '#foreign' | |
USING = 'using' | |
NEW = 'new' | |
DELETE = 'delete' | |
DEFER = 'defer' | |
SWITCH = 'switch' | |
IF = 'if' | |
ELSE = 'else' | |
FOR = 'for' | |
WHILE = 'while' | |
DO = 'do' | |
PLUS = "+" | |
MINUS = "-" | |
ASTERISK = "*" | |
SLASH = "/" | |
MODULO = "%" | |
INCREMENT = "++" | |
DECREMENT = "--" | |
LOGICAL_NOT = "!" | |
SHIFT_LEFT = "<<" | |
SHIFT_RIGHT = ">>" | |
LESS = "<" | |
GREATER = ">" | |
LESS_EQUAL = "<=" | |
GREATER_EQUAL = ">=" | |
EQUALITY = "==" | |
NO_EQUALITY = "!=" | |
LOGICAL_AND = "&&" | |
LOGICAL_OR = "||" | |
XOR = "^" | |
NOT = "~" | |
AND = "&" | |
OR = "|" | |
QUESTION = "?" | |
COLON = ":" | |
DOUBLE_COLON = "::" | |
COLONASSIGN = ":=" | |
ASSIGN = "=" | |
MUL_ASSIGN = "*=" | |
DIV_ASSIGN = "/=" | |
MOD_ASSIGN = "%=" | |
ADD_ASSIGN = "+=" | |
SUB_ASSIGN = "-=" | |
LSHIFT_ASSIGN = "<<=" | |
RSHIFT_ASSIGN = ">>=" | |
AND_ASSIGN = "&=" | |
XOR_ASSIGN = "^=" | |
OR_ASSIGN = "|=" | |
DOT = "." | |
DOUBLE_DOT = ".." | |
ARROW = "->" | |
SEMICOLON = ";" | |
LPAREN = "(" | |
RPAREN = ")" | |
LSQUARE = "[" | |
RSQUARE = "]" | |
LBRACKET = "{" | |
RBRACKET = "}" | |
COMMA = "," | |
NOINIT = "---" | |
# Compiler Options and Constants | |
Debug_Verbose = False | |
s8_min = -128 | |
s8_max = 127 | |
s16_min = -32768 | |
s16_max = 32767 | |
s32_min = -2147483648 | |
s32_max = 2147483647 | |
s64_min = -9223372036854775808 | |
s64_max = 9223372036854775807 | |
u8_min = 0 | |
u8_max = 255 | |
u16_min = 0 | |
u16_max = 65535 | |
u32_min = 0 | |
u32_max = 4294967295 | |
u64_min = 0 | |
u64_max = 18446744073709551615 | |
f32_digit_min = 0 | |
f64_digit_min = 7 | |
Type_Specifiers = (VOID, BOOL, SIGNED8, SIGNED16, SIGNED32, SIGNED64, UNSIGNED8, UNSIGNED16, UNSIGNED32, UNSIGNED64, FLOAT32, FLOAT64, STRING, IDENTIFIER) | |
id_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890_' | |
# Runtime stuff (like arrays and pointers) | |
# | |
# | |
# (T) is replaced with the assumed type here, exactly like how the compiler | |
# handles parameterized structs (since these are indeed parameterized structs) | |
# Since these are all imperitive and part of the runtime, we're handling them manually | |
Struct_Prefix = "__STR_STRUCT__" | |
New_string_C = 'New_string' | |
Static_Array_C = 'Static_Array(T)' | |
Dynamic_Array_C = 'Dynamic_Array(T)' | |
class Token(object): | |
def __init__(self, type, value, line, character): | |
self.type = type | |
self.value = value | |
self.line = line | |
self.character = character | |
print('Creating Token: ' + str(self)) | |
def __str__(self): | |
return 'Token({type}, {value})'.format( | |
type = self.type, | |
value = repr(self.value) | |
) | |
def __repr__(self): | |
return self.__str__() | |
print('Defining reserved keywords for Stretch') | |
Keywords = { | |
PUBLIC: Token(PUBLIC, 'public', 0, 0), | |
PRIVATE: Token(PRIVATE, 'private', 0, 0), | |
CASE: Token(CASE, 'case', 0, 0), | |
DEFAULT: Token(DEFAULT, 'default', 0, 0), | |
CONTINUE: Token(CONTINUE, 'continue', 0, 0), | |
BREAK: Token(BREAK, 'break', 0, 0), | |
RETURN: Token(RETURN, 'return', 0, 0), | |
ANY: Token(ANY, 'any', 0, 0), | |
VOID: Token(VOID, 'void', 0, 0), | |
BOOL: Token(BOOL, 'bool', 0, 0), | |
SIGNED8: Token(SIGNED8, 's8', 0, 0), | |
SIGNED16: Token(SIGNED16, 's16', 0, 0), | |
SIGNED32: Token(SIGNED32, 's32', 0, 0), | |
SIGNED64: Token(SIGNED64, 's64', 0, 0), | |
UNSIGNED8: Token(UNSIGNED8, 'u8', 0, 0), | |
UNSIGNED16: Token(UNSIGNED16, 'u16', 0, 0), | |
UNSIGNED32: Token(UNSIGNED32, 'u32', 0, 0), | |
UNSIGNED64: Token(UNSIGNED64, 'u64', 0, 0), | |
FLOAT32: Token(FLOAT32, 'f32', 0, 0), | |
FLOAT64: Token(FLOAT64, 'f64', 0, 0), | |
STRING: Token(STRING, 'string', 0, 0), | |
NULL: Token(NULL, 'null', 0, 0), | |
TRUE: Token(BOOLEAN_CONSTANT, 'true', 0, 0), | |
FALSE: Token(BOOLEAN_CONSTANT, 'false', 0, 0), | |
STRUCT: Token(STRUCT, 'struct', 0, 0), | |
ENUM: Token(ENUM, 'enum', 0, 0), | |
ALIAS: Token(ALIAS, 'alias', 0, 0), | |
FOREIGN: Token(FOREIGN, '#foreign', 0, 0), | |
USING: Token(USING, 'using', 0, 0), | |
NEW: Token(NEW, 'new', 0, 0), | |
DELETE: Token(DELETE, 'delete', 0, 0), | |
DEFER: Token(DEFER, 'defer', 0, 0), | |
SWITCH: Token(SWITCH, 'switch', 0, 0), | |
IF: Token(IF, 'if', 0, 0), | |
ELSE: Token(ELSE, 'else', 0, 0), | |
FOR: Token(FOR, 'for', 0, 0), | |
WHILE: Token(WHILE, 'while', 0, 0), | |
DO: Token(DO, 'do', 0, 0), | |
FOREIGN: Token(FOREIGN, '#foreign', 0, 0) | |
} | |
########################## | |
# | |
# Lexer (Tokenizer) | |
# | |
# Transforms a code file into an array of tokens that can be parsed by the compiler | |
# | |
########################## | |
class Lexer(object): | |
def __init__(self, text): | |
self.text = text | |
self.pos = 0 | |
self.current_char = self.text[self.pos] | |
# TODO: line and character capturing | |
self.current_line = 0 | |
self.current_character = 0 | |
def token(self, type, value): | |
return Token(type, value, self.current_line, self.current_char) | |
def tokenize(self): | |
"""Evaluates self.text and transforms the string into an array of tokens.""" | |
token = self.get_next_token() | |
tokens = [token] | |
while token.type != EOF: | |
token = self.get_next_token() | |
tokens.append(token) | |
return tokens | |
def error(self, message): | |
raise Exception(message) | |
def advance(self, index = 1): | |
"""Move the cursor for self.text up one""" | |
self.pos += index | |
if (self.pos > len(self.text) - 1): | |
self.current_char = None | |
else: | |
self.current_char = self.text[self.pos] | |
def peek(self, index = 1): | |
"""Peek ahead of the cursor, forward in self.text""" | |
peek_pos = self.pos + index | |
if (peek_pos > len(self.text) - 1): | |
return None | |
else: | |
return self.text[peek_pos] | |
def skip_whitespace(self): | |
"""Skip redundant whitespace, ezpz""" | |
while self.current_char is not None and self.current_char.isspace(): | |
self.advance() | |
def skip_comment(self): | |
while self.current_char is not None and self.current_char + self.peek() != '*/': | |
self.advance() | |
self.advance(2) # */ | |
def skip_rest_of_line(self): | |
while self.current_char is not None and self.current_char != '\n': | |
self.advance() | |
self.advance() # the new line | |
def get_number_constant(self): | |
result = "" | |
while self.current_char is not None and self.current_char.isdigit(): | |
result += self.current_char | |
self.advance() | |
if self.current_char == '.': | |
result += self.current_char | |
self.advance() | |
while self.current_char is not None and self.current_char.isdigit(): | |
result += self.current_char | |
self.advance() | |
return self.token(FLOAT_CONSTANT, float(result)) | |
return self.token(INTEGER_CONSTANT, int(result)) | |
def get_string_constant(self): | |
result = self.current_char | |
self.advance() | |
while self.current_char is not None and self.current_char != '"': | |
result += self.current_char | |
self.advance() | |
result += self.current_char | |
self.advance() | |
return self.token(STRING_CONSTANT, result) | |
def get_char_constant(self): | |
result = self.current_char | |
self.advance() | |
while self.current_char is not None and self.current_char != '\'': | |
result += self.current_char | |
self.advance() | |
result += self.current_char | |
self.advance() | |
def get_ident(self): | |
result = "" | |
while self.current_char is not None and self.current_char in id_chars: | |
result += self.current_char | |
self.advance() | |
return Keywords.get(result, self.token(IDENTIFIER, result)) | |
def get_next_token(self): | |
while self.current_char is not None: | |
if self.current_char.isspace(): | |
self.skip_whitespace() | |
continue | |
if self.peek() is not None: | |
if self.current_char + self.peek() == '/*': | |
self.skip_comment() | |
continue | |
if self.current_char + self.peek() == '//': | |
self.skip_rest_of_line() | |
continue | |
if self.current_char == '"': | |
return self.get_string_constant() | |
if self.current_char == '\'': | |
return self.get_char_constant() | |
if self.current_char.isdigit(): | |
if self.peek() == 'x' and self.peek(2).isidigit(): | |
self.advance(2) # 0x | |
token = self.get_number_constant() | |
return self.token(HEX_CONSTANT, token.value) | |
return self.get_number_constant() | |
if self.current_char.isalpha() or self.current_char == '_': | |
return self.get_ident() | |
if self.current_char == '+': | |
if self.peek() == '=': | |
self.advance(2) # += | |
return self.token(ADD_ASSIGN, '+=') | |
elif self.peek() == '+': | |
self.advance(2) # ++ | |
return self.token(INCREMENT, '++') | |
self.advance() | |
return self.token(PLUS, '+') | |
if self.current_char == '-': | |
if self.peek() == '=': | |
self.advance(2) # -= | |
return self.token(SUB_ASSIGN, '-=') | |
elif self.peek() == '-': | |
if self.peek(2) == '-': | |
self.advance(3) # --- | |
return self.token(NOINIT, '---') | |
self.advance(2) # -- | |
return self.token(DECREMENT, '--') | |
elif self.peek() == '>': | |
self.advance(2) # -> | |
return self.token(ARROW, '->') | |
self.advance() | |
return self.token(MINUS, '-') | |
if self.current_char == '*': | |
if self.peek() == '=': | |
self.advance(2) # *= | |
return self.token(MUL_ASSIGN, '*=') | |
self.advance() | |
return self.token(ASTERISK, '*') | |
if self.current_char == '/': | |
if self.peek() == '=': | |
self.advance(2) # /= | |
return self.token(DIV_ASSIGN, '/=') | |
self.advance() | |
return self.token(SLASH, '/') | |
if self.current_char == '%': | |
if self.peek() == '=': | |
self.advance(2) # %= | |
return self.token(MOD_ASSIGN, '%=') | |
self.advance() | |
return self.token(MODULO, '%') | |
if self.current_char == '!': | |
if self.peek() == '=': | |
self.advance(2) # != | |
return self.token(NO_EQUALITY, '!=') | |
self.advance() | |
return self.token(LOGICAL_NOT, '!') | |
if self.current_char == '<': | |
if self.peek() == '=': | |
self.advance(2) # <= | |
return self.token(LESS_EQUAL, '<=') | |
elif self.peek() == '<': | |
if self.peek(2) == '=': | |
self.advance(3) # <<= | |
return self.token(LSHIFT_ASSIGN, '<<=') | |
self.advance(2) # << | |
return self.token(SHIFT_LEFT, '<<') | |
self.advance() | |
return self.token(LESS, '<') | |
if self.current_char == '>': | |
if self.peek() == '=': | |
self.advance(2) # >= | |
return self.token(GREATER_EQUAL, '>=') | |
elif self.peek() == '>': | |
if self.peek(2) == '=': | |
self.advance(3) # >>= | |
return self.token(RSHIFT_ASSIGN, '>>=') | |
self.advance(2) # >> | |
return self.token(SHIFT_RIGHT, '>>') | |
self.advance() | |
return self.token(GREATER, '>') | |
if self.current_char == '=': | |
if self.peek() == '=': | |
self.advance(2) # == | |
return self.token(EQUALITY, '==') | |
self.advance() | |
return self.token(ASSIGN, '=') | |
if self.current_char == '&': | |
if self.peek() == '=': | |
self.advance(2) # &= | |
return self.token(AND_ASSIGN, '&=') | |
elif self.peek() == '&': | |
self.advance(2) # && | |
return self.token(LOGICAL_AND, '&&') | |
self.advance() | |
return self.token(AND, '&') | |
if self.current_char == '|': | |
if self.peek() == '=': | |
self.advance(2) # |= | |
return self.token(OR_ASSIGN, '|=') | |
elif self.peek() == '|': | |
self.advance(2) # || | |
return self.token(LOGICAL_OR, '||') | |
self.advance() | |
return self.token(OR, '|') | |
if self.current_char == '^': | |
if self.peek() == '^': | |
self.advance(2) # ^= | |
return self.token(XOR_ASSIGN, '^=') | |
self.advance() | |
return self.token(XOR, '^') | |
if self.current_char == '~': | |
self.advance() | |
return self.token(NOT, '~') | |
if self.current_char == '?': | |
self.advance() | |
return self.token(QUESTION, '?') | |
if self.current_char == ':': | |
if self.peek() == '=': | |
self.advance(2) # := | |
return self.token(COLONASSIGN, ':=') | |
elif self.peek() == ':': | |
self.advance(2) # :: | |
return self.token(DOUBLE_COLON, '::') | |
self.advance() | |
return self.token(COLON, ':') | |
if self.current_char == '.': | |
if self.peek() == '.': | |
self.advance(2) # .. | |
return self.token(DOUBLE_DOT, '..') | |
self.advance() | |
return self.token(DOT, '.') | |
if self.current_char == ',': | |
self.advance() | |
return self.token(COMMA, ',') | |
if self.current_char == ';': | |
self.advance() | |
return self.token(SEMICOLON, ';') | |
if self.current_char == '(': | |
self.advance() | |
return self.token(LPAREN, '(') | |
if self.current_char == ')': | |
self.advance() | |
return self.token(RPAREN, ')') | |
if self.current_char == '[': | |
self.advance() | |
return self.token(LSQUARE, '[') | |
if self.current_char == ']': | |
self.advance() | |
return self.token(RSQUARE, ']') | |
if self.current_char == '{': | |
self.advance() | |
return self.token(LBRACKET, '{') | |
if self.current_char == '}': | |
self.advance() | |
return self.token(RBRACKET, '}') | |
self.error('Invalid character %s' % self.current_char) | |
return self.token(EOF, None) | |
########################## | |
# | |
# Parser (AST Objects and Generator) | |
# | |
# The parser generates an AST to be walked by an AST walker | |
# | |
########################## | |
class AST(object): | |
pass | |
class TranslationUnit(AST): | |
def __init__(self, external_declarations): | |
self.external_declarations = external_declarations | |
class ExternalDeclaration(AST): | |
def __init__(self, declaration): | |
self.declaration = declaration | |
class Variable(AST): | |
def __init__(self, token): | |
self.token = token | |
class VariableList(AST): | |
def __init__(self, variables): | |
self.variables = variables | |
class Constant(AST): | |
def __init__(self, token): | |
self.token = token | |
class FunctionDefinition(AST): | |
def __init__(self, name, arguments, return_types, foreign, right): | |
self.name = name | |
self.arguments = arguments | |
self.return_types = return_types | |
self.foreign = foreign | |
self.right = right | |
class ForeignSpecifier(AST): | |
def __init__(self, func): | |
self.func = func | |
class Declaration(AST): | |
def __init__(self, variable, declarator, initializer, constant = False): | |
"""There must at least be a declarator or an initializer, or both""" | |
self.variable = variable | |
self.declarator = declarator | |
self.initializer = initializer | |
self.constant = constant | |
class DeclarationList(AST): | |
def __init__(self, declarations): | |
self.declarations = declarations | |
class TightDeclaration(AST): | |
def __init__(self, variable, declarator, initializer): | |
""" | |
Variable cannot be a VariableList | |
there must always be a declarator | |
initializer is optional | |
""" | |
self.variable = variable | |
self.declarator = declarator | |
self.initializer = initializer | |
class TightDeclarationList(AST): | |
def __init__(self, tight_declarations): | |
self.tight_declarations = tight_declarations | |
class VariableDeclarator(AST): | |
def __init__(self, pointer, direct_declarator): | |
self.pointer = pointer | |
self.direct_declarator = direct_declarator | |
class DirectDeclarator(AST): | |
def __init__(self, left, right): | |
""" | |
No left and a right = []direct_declarator | |
a left and no right = type or (variable_declarator) | |
a left and a right = [constant_expression]direct_declarator | |
""" | |
self.left = left | |
self.right = right | |
class Pointer(AST): | |
def __init__(self, pointer): | |
self.pointer = pointer | |
class Initializer(AST): | |
def __init__(self, expression): | |
""" | |
expression may be: | |
assignment_expression | |
or | |
{ initializer_list } | |
""" | |
self.expression = expression | |
class InitializerList(AST): | |
def __init__(self, initializers): | |
self.initializers = initializers | |
class TypeDeclarator(AST): | |
def __init__(self, type, data_type, declaration_list): | |
self.type = type | |
self.data_type = data_type | |
self.declaration_list = declaration_list | |
class StructParameter(AST): | |
def __init__(self, parameter): | |
self.parameter = parameter | |
class StructParameterList(AST): | |
def __init__(self, parameters): | |
self.parameters = parameters | |
class EnumDeclaration(AST): | |
def __init__(self, variable, constant_expression): | |
self.variable = variable | |
self.constant_expression | |
class EnumDeclarationList(AST): | |
def __init__(self, declarations): | |
self.declarations = declarations | |
class Type(AST): | |
def __init__(self, token): | |
self.token = token | |
self.value = token.value | |
class TypeList(AST): | |
def __init__(self, types): | |
self.types = types | |
class Statement(AST): | |
def __init__(self, statement): | |
self.statement = statement | |
class StatementList(AST): | |
def __init__(self, statements): | |
self.statements = statements | |
class LabeledStatement(AST): | |
def __init__(self, expression, statement): | |
self.expression = expression | |
self.statement = statement | |
class ExpressionStatement(AST): | |
def __init__(self, expression): | |
self.expression = expression | |
class CompoundStatement(AST): | |
def __init__(self, items): | |
""" | |
items can be a mixed collections of declarations and statements | |
""" | |
self.items = items | |
class SelectionStatement(AST): | |
def __init__(self, left, expression, first_statement, second_statement): | |
""" | |
second_statement is only used if left is 'if' and there is an else statements | |
""" | |
self.left = left | |
self.expression = expression | |
self.first_statement = first_statement | |
self.second_statement = second_statement | |
class IterationStatement(AST): | |
def __init__(self, left, variable, first_expression, second_expression, statement): | |
""" | |
second_expression is only used in "for var : first_expr .. sec_expr" statements | |
variable is only used for "for var :" statements | |
""" | |
self.left = left | |
self.variable = variable | |
self.first_expression = first_expression | |
self.second_expression = second_expression | |
self.statement = statement | |
class JumpStatement(AST): | |
def __init__(self, left, expression): | |
self.left = left | |
self.expression = expression | |
class Expression(AST): | |
def __init__(self, assignment_expressions): | |
self.assignment_expressions = assignment_expressions | |
class TernaryExpression(AST): | |
def __init__(self, condition, left, right): | |
self.condition = condition | |
self.left = left | |
self.right = right | |
class BinaryExpression(AST): | |
def __init__(self, left, op, right): | |
self.left = left | |
self.op = op | |
self.right = right | |
class UnaryExpression(AST): | |
def __init__(self, op, right): | |
self.op = op | |
self.right = right | |
class CastExpression(AST): | |
def __init__(self, type, expression): | |
self.type = type | |
self.expression = expression | |
class PostfixExpression(AST): | |
def __init__(self, left, op, right): | |
self.left = left | |
self.op = op | |
self.right = right | |
class PrimaryExpression(AST): | |
def __init__(self, primary): | |
self.primary = primary | |
class ArgumentExpressionList(AST): | |
def __init__(self, assignments): | |
self.assignments = assignments | |
class NoExpression(AST): | |
pass | |
########################## | |
# | |
# The Actual Parser | |
# | |
# This parser parses through all of the tokens transformed by the | |
# lexer, and transforms the parsed tokens into an Abstract Syntax Tree (AST) | |
# | |
########################## | |
class Parser(object): | |
def __init__(self, tokens): | |
self.tokens = tokens | |
self.pos = 0 | |
self.current_token = self.tokens[self.pos] | |
def parse(self): | |
return self.translation_unit() | |
def error(self, message): | |
raise Exception('Invalid syntax: ' + message) | |
def peek(self, index = 1): | |
peek_pos = self.pos + index | |
if peek_pos > len(self.tokens) - 1: | |
return Token(EOF, None, 0, 0) | |
else: | |
return self.tokens[peek_pos] | |
def eat(self, token_type): | |
if self.current_token.type == token_type: | |
print(' Eating token: ' + str(self.current_token)) | |
self.pos += 1 | |
self.current_token = self.tokens[self.pos] | |
print(' New token: ' + str(self.current_token)) | |
else: | |
self.error('Expected token: ' + str(token_type) + ' but got ' + str(self.current_token.type) + ' instead.') | |
def is_declaration(self, index = 0): | |
peek_token = self.peek(index) | |
if peek_token.type == IDENTIFIER: | |
i = index + 1 | |
peek_token = self.peek(i) | |
while peek_token.type == COMMA: | |
i += 2 | |
peek_token = self.peek(i) | |
print('~~peek_token: ' + str(peek_token)) | |
if peek_token.type in (COLON, COLONASSIGN, DOUBLE_COLON): | |
next_peek_token = self.peek(i + 1) | |
print('~~next_peek_token: ' + str(next_peek_token)) | |
if peek_token.type == DOUBLE_COLON and next_peek_token.type == LPAREN: | |
return False | |
return True | |
return False | |
def translation_unit(self): | |
decls = [] | |
while self.current_token.type != EOF: | |
decls.append(self.external_declaration()) | |
return TranslationUnit(decls) | |
def external_declaration(self): | |
if self.is_declaration(): | |
return ExternalDeclaration(self.declaration()) | |
return ExternalDeclaration(self.function_definition()) | |
def variable(self): | |
token = self.current_token | |
self.eat(IDENTIFIER) | |
return Variable(token) | |
def variable_list(self): | |
variables = [self.variable()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
variables.append(self.variable()) | |
return VariableList(variables) | |
def constant(self): | |
if self.current_token.type not in (INTEGER_CONSTANT, FLOAT_CONSTANT, BOOLEAN_CONSTANT, STRING_CONSTANT, CHAR_CONSTANT): | |
self.error('Expected a _CONSTANT token, but got: ' + str(self.current_token.type) + ' instead.') | |
constant = self.current_token | |
self.eat(self.current_token.type) | |
return Constant(constant) | |
def function_definition(self): | |
var = self.variable() | |
self.eat(DOUBLE_COLON) | |
self.eat(LPAREN) | |
decl_list = None | |
if self.current_token.type != RPAREN: | |
decl_list = self.tight_declaration_list() | |
self.eat(RPAREN) | |
type_list = None | |
if self.current_token.type == ARROW: | |
self.eat(ARROW) | |
type_list = self.type_list() | |
if self.current_token.type == FOREIGN: | |
foreign = self.foreign_specifier() | |
self.eat(SEMICOLON) | |
return FunctionDefinition(var, decl_list, type_list, foreign, None) | |
return FunctionDefinition(var, decl_list, type_list, None, self.compound_statement()) | |
def foreign_specifier(self): | |
self.eat(FOREIGN) | |
if self.current_token.type == STRING_CONSTANT: | |
spec = ForeignSpecifier(self.current_token) | |
self.eat(STRING_CONSTANT) | |
return spec | |
return ForeignSpecifier(None) | |
def declaration(self): | |
peek_token = self.peek() | |
if peek_token.type == DOUBLE_COLON: | |
# this could be a type_declarator! | |
# ... but it could also be an type-implicit-initialized constant | |
var = self.variable() | |
self.eat(DOUBLE_COLON) | |
if self.current_token.type in (STRUCT, ENUM, ALIAS): | |
# it is indeed a type_declarator | |
type_decl = self.type_declarator() | |
self.eat(SEMICOLON) | |
return Declaration(var, type_decl, None) | |
# it is NOT a type_declarator | |
initer = self.initializer() | |
self.eat(SEMICOLON) | |
return Declaration(var, None, initer) | |
var_list = self.variable_list() | |
if self.current_token.type == COLON: | |
self.eat(COLON) | |
decl = self.variable_declarator() | |
if self.current_token.type == SEMICOLON: | |
# A variable that is initialized with the default value | |
self.eat(SEMICOLON) | |
return Declaration(var_list, decl, None) | |
elif self.current_token.type == ASSIGN: | |
# A typical variable declaration and initialization | |
self.eat(ASSIGN) | |
initer = self.initializer() | |
self.eat(SEMICOLON) | |
return Declaration(var_list, decl, initer) | |
elif self.current_token.type == COLON: | |
# An initialized constant, type-explicit | |
self.eat(COLON) | |
initer = self.initializer() | |
self.eat(SEMICOLON) | |
return Declaration(var_list, decl, initer, True) | |
elif self.current_token.type == COLONASSIGN: | |
self.eat(COLONASSIGN) | |
initer = self.initializer() | |
self.eat(SEMICOLON) | |
return Declaration(var_list, None, initer) | |
elif self.current_token.type == DOUBLE_COLON: | |
self.eat(DOUBLE_COLON) | |
initer = self.initializer() | |
self.eat(SEMICOLON) | |
return Declaration(var_list, None, initer, True) | |
self.error('Expected a declaration but got: ' + str(self.current_token) + ' instead') | |
def declaration_list(self): | |
decls = [] | |
while self.is_declaration(): | |
decls.append(self.declaration()) | |
return DeclarationList(decls) | |
def tight_declaration(self): | |
var = self.variable() | |
self.eat(COLON) | |
decl = self.variable_declarator() | |
if self.current_token.type == ASSIGN: | |
self.eat(ASSIGN) | |
return TightDeclaration(var, decl, self.initializer()) | |
return TightDeclaration(var, decl, None) | |
def tight_declaration_list(self): | |
decls = [self.tight_declaration()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
decls.append(self.tight_declaration()) | |
return TightDeclarationList(decls) | |
def variable_declarator(self): | |
if self.current_token.type == ASTERISK: | |
return VariableDeclarator(self.pointer(), self.direct_declarator()) | |
return VariableDeclarator(None, self.direct_declarator()) | |
def direct_declarator(self): | |
if self.current_token.type == LPAREN: | |
self.eat(LPAREN) | |
var = self.variable_declarator() | |
self.eat(RPAREN) | |
return DirectDeclarator(var, None) | |
elif self.current_token.type == LSQUARE: | |
self.eat(LSQUARE) | |
if self.current_token.type == RSQUARE: | |
self.eat(RSQUARE) | |
return DirectDeclarator(None, self.variable_declarator()) | |
const = self.constant_expression() | |
self.eat(RSQUARE) | |
return DirectDeclarator(const, self.variable_declarator()) | |
return DirectDeclarator(self.type(), None) | |
def pointer(self): | |
self.eat(ASTERISK) | |
if self.current_token.type == ASTERISK: | |
return Pointer(self.pointer()) | |
return Pointer(None) | |
def initializer(self): | |
if self.current_token.type == LBRACKET: | |
self.eat(LBRACKET) | |
initer = self.initializer_list() | |
if self.current_token.type == COMMA: | |
self.eat(COMMA) | |
self.eat(RBRACKET) | |
return Initializer(initer) | |
elif self.current_token.type == NOINIT: | |
self.eat(NOINIT) | |
return Initializer(None) | |
return Initializer(self.assignment_expression()) | |
def initializer_list(self): | |
initers = [self.initializer()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
initers.append(self.initializer()) | |
return InitializerList(initers) | |
def type_declarator(self): | |
left = self.current_token | |
if self.current_token.type == STRUCT: | |
self.eat(STRUCT) | |
struct_params = None | |
if self.current_token.type == LPAREN: | |
self.eat(LPAREN) | |
struct_params = self.struct_parameter_list() | |
self.eat(RPAREN) | |
self.eat(LBRACKET) | |
decl_list = self.declaration_list() | |
self.eat(RBRACKET) | |
return TypeDeclarator(left, struct_params, decl_list) | |
elif self.current_token.type == ENUM: | |
self.eat(ENUM) | |
if self.current_token.type == LBRACKET: | |
self.eat(LBRACKET) | |
decl_list = self.enum_declaration_list() | |
self.eat(RBRACKET) | |
return TypeDeclarator(left, None, decl_list) | |
int_type = self.integral_type() | |
self.eat(LBRACKET) | |
decl_list = self.enum_declaration_list() | |
self.eat(RBRACKET) | |
return TypeDeclarator(left, int_type, decl_list) | |
self.eat(ALIAS) | |
return TypeDeclarator(left, None, self.type()) | |
def struct_parameter(self, node): | |
if self.current_token.type in (INTEGER_CONSTANT, FLOAT_CONSTANT, BOOLEAN_CONSTANT, STRING_CONSTANT, CHAR_CONSTANT): | |
return self.constant() | |
return self.type() | |
def struct_parameter_list(self, node): | |
params = [self.struct_parameter()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
params.append(self.struct_parameter()) | |
return StructParameterList(params) | |
def enum_declaration(self): | |
var = self.variable() | |
if self.current_token.type == COLON: | |
self.eat(ASSIGN) | |
expr = self.constant_expression() | |
return EnumDeclaration(var, expr) | |
return EnumDeclaration(var, None) | |
def enum_declaration_list(self): | |
decls = [self.enum_declaration()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
decls.append(self.enum_declaration()) | |
return EnumDeclarationList(decls) | |
def type(self): | |
if self.current_token.type in (VOID, BOOL, SIGNED8, SIGNED16, SIGNED32, SIGNED64, UNSIGNED8, UNSIGNED16, UNSIGNED32, UNSIGNED64, FLOAT32, FLOAT64, STRING, IDENTIFIER): | |
token = self.current_token | |
self.eat(self.current_token.type) | |
return Type(token) | |
self.error('Expected an IDENTIFIER or Type token, got: ' + str(self.current_token) + ' instead.') | |
def integral_type(self): | |
if self.current_token.type in (SIGNED8, SIGNED16, SIGNED32, SIGNED64, UNSIGNED8, UNSIGNED16, UNSIGNED32, UNSIGNED64): | |
token = self.current_token | |
self.eat(self.current_token.type) | |
return Type(token) | |
self.error('Expected an Integral token, got: ' + str(self.current_token) + ' instead.') | |
def type_list(self): | |
types = [self.type()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
types.append(self.type()) | |
return TypeList(types) | |
def statement(self): | |
if self.current_token.type in (CASE, DEFAULT): | |
return Statement(self.labeled_statement()) | |
elif self.current_token.type == LBRACKET: | |
return Statement(self.compound_statement()) | |
elif self.current_token.type in (IF, SWITCH): | |
return Statement(self.selection_statement()) | |
elif self.current_token.type in (WHILE, DO, FOR): | |
return Statement(self.iteration_statement()) | |
elif self.current_token.type in (CONTINUE, BREAK, RETURN): | |
return Statement(self.jump_statement()) | |
return Statement(self.expression_statement()) | |
def statement_list(self): | |
# since statement lists are only present in compoun statements | |
# we can assume that the statement list will always end at an RBRACKET | |
# token ( } ) | |
stats = [self.statement()] | |
while self.current_token.type != RBRACKET: | |
stats.append(self.statement()) | |
return StatementList(stats) | |
def labeled_statement(self): | |
if self.current_token.type == CASE: | |
self.eat(CASE) | |
expr = self.constant_expression() | |
self.eat(COLON) | |
return LabeledStatement(expr, self.statement()) | |
self.eat(DEFAULT) | |
self.eat(COLON) | |
return LabeledStatement(None, self.statement()) | |
def expression_statement(self): | |
if self.current_token.type == SEMICOLON: | |
self.eat(SEMICOLON) | |
return ExpressionStatement(None) | |
expr = self.expression() | |
self.eat(SEMICOLON) | |
return ExpressionStatement(expr) | |
def compound_statement(self): | |
self.eat(LBRACKET) | |
nodes = [] | |
while self.current_token.type != RBRACKET: | |
if self.is_declaration(): | |
nodes.append(self.declaration_list()) | |
else: | |
nodes.append(self.statement_list()) | |
self.eat(RBRACKET) | |
return CompoundStatement(nodes) | |
def selection_statement(self): | |
left = self.current_token | |
if self.current_token.type == IF: | |
self.eat(IF) | |
expr = self.expression() | |
left_statement = self.statement() | |
if self.current_token.type == ELSE: | |
self.eat(ELSE) | |
return SelectionStatement(left, expr, left_statement, self.statement()) | |
return SelectionStatement(left, expr, left_statement, None) | |
self.eat(SWITCH) | |
return SelectionStatement(left, self.expression(), self.statement()) | |
def iteration_statement(self): | |
left = self.current_token | |
if self.current_token.type == WHILE: | |
self.eat(WHILE) | |
return IterationStatement(left, None, self.expression(), None, self.statement()) | |
elif self.current_token.type == DO: | |
self.eat(DO) | |
stat = self.statement() | |
self.eat(WHILE) | |
expr = self.expression() | |
self.eat(SEMICOLON) | |
return IterationStatement(left, None, expr, None, stat) | |
self.eat(FOR) | |
peek_token = self.peek() | |
if peek_token.type == COLON: | |
var = self.variable() | |
self.eat(COLON) | |
expr = self.expression() | |
if self.current_token.type == DOUBLE_DOT: | |
self.eat(DOUBLE_DOT) | |
right_expr = self.expression() | |
return IterationStatement(left, var, expr, right_expr, self.statement()) | |
return IterationStatement(left, var, expr, None, self.statement()) | |
return IterationStatement(left, None, self.expression(), None, self.statement()) | |
def jump_statement(self): | |
left = self.current_token | |
if self.current_token.type in (CONTINUE, BREAK): | |
self.eat(self.current_token.type) | |
return JumpStatement(left, None) | |
self.eat(RETURN) | |
if self.current_token.type == SEMICOLON: | |
self.eat(SEMICOLON) | |
return JumpStatement(left, None) | |
expr = self.expression() | |
self.eat(SEMICOLON) | |
return JumpStatement(left, expr) | |
def expression(self): | |
print('IN: expression') | |
exprs = [self.assignment_expression()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
exprs.append(self.assignment_expression) | |
return Expression(exprs) | |
def assignment_expression(self): | |
print('IN: assignment_expression') | |
node = self.conditional_expression() | |
print('at assignment: ' + str(self.current_token)) | |
while self.current_token.type in (ASSIGN, MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, ADD_ASSIGN, SUB_ASSIGN, LSHIFT_ASSIGN, RSHIFT_ASSIGN, AND_ASSIGN, XOR_ASSIGN, OR_ASSIGN): | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, self.assignment_operator, self.assignment_expression()) | |
return node | |
def assignment_operator(self): | |
op = self.current_token | |
if self.current_token.type in (ASSIGN, MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, ADD_ASSIGN, SUB_ASSIGN, LSHIFT_ASSIGN, RSHIFT_ASSIGN, AND_ASSIGN, XOR_ASSIGN, OR_ASSIGN): | |
self.eat(self.current_token.type) | |
return op | |
self.error('Expected an assignment operator token but got: ' + str(op) + ' instead.') | |
def conditional_expression(self): | |
print('IN: conditional_expression') | |
node = self.logical_or_expression() | |
if self.current_token.type == QUESTION: | |
self.eat(QUESTION) | |
expr = self.expression() | |
self.eat(COLON) | |
return TernaryExpression(node, expr, self.conditional_expression()) | |
return node | |
def constant_expression(self): | |
return self.conditional_expression() | |
def logical_or_expression(self): | |
print('IN: logical_or_expression') | |
node = self.logical_and_expression() | |
while self.current_token.type == LOGICAL_OR: | |
op = self.current_token | |
self.eat(LOGICAL_OR) | |
node = BinaryExpression(node, op, self.logical_and_expression()) | |
return node | |
def logical_and_expression(self): | |
node = self.inclusive_or_expression() | |
while self.current_token.type == LOGICAL_AND: | |
op = self.current_token | |
self.eat(LOGICAL_AND) | |
node = BinaryExpression(node, op, self.inclusive_or_expression()) | |
return node | |
def inclusive_or_expression(self): | |
node = self.exclusive_or_expression() | |
while self.current_token.type == OR: | |
op = self.current_token | |
self.eat(OR) | |
node = BinaryExpression(node, op, self.exclusive_or_expression()) | |
return node | |
def exclusive_or_expression(self): | |
node = self.and_expression() | |
while self.current_token.type == XOR: | |
op = self.current_token | |
self.eat(XOR) | |
node = BinaryExpression(node, op, self.and_expression()) | |
return node | |
def and_expression(self): | |
node = self.equality_expression() | |
while self.current_token.type == AND: | |
op = self.current_token | |
self.eat(AND) | |
node = BinaryExpression(node, op, self.equality_expression()) | |
return node | |
def equality_expression(self): | |
node = self.relational_expression() | |
while self.current_token.type in (EQUALITY, NO_EQUALITY): | |
op = self.current_token | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, op, self.relational_expression()) | |
return node | |
def relational_expression(self): | |
node = self.shift_expression() | |
while self.current_token.type in (LESS, GREATER, LESS_EQUAL, GREATER_EQUAL): | |
op = self.current_token | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, op, self.shift_expression()) | |
return node | |
def shift_expression(self): | |
node = self.additive_expression() | |
while self.current_token.type in (SHIFT_LEFT, SHIFT_RIGHT): | |
op = self.current_token | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, op, self.additive_expression()) | |
return node | |
def additive_expression(self): | |
node = self.multiplicative_expression() | |
while self.current_token.type in (PLUS, MINUS): | |
op = self.current_token | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, op, self.multiplicative_expression()) | |
return node | |
def multiplicative_expression(self): | |
node = self.cast_expression() | |
while self.current_token.type in (ASTERISK, SLASH, MODULO): | |
op = self.current_token | |
self.eat(self.current_token.type) | |
node = BinaryExpression(node, op, self.cast_expression()) | |
return node | |
def cast_expression(self): | |
type_peek = self.peek() | |
peek_token = self.peek(2) | |
if self.current_token.type == LPAREN and peek_token.type == RPAREN: | |
if type_peek.type in (VOID, BOOL, SIGNED8, SIGNED16, SIGNED32, SIGNED64, UNSIGNED8, UNSIGNED16, UNSIGNED32, UNSIGNED64, FLOAT32, FLOAT64, STRING, IDENTIFIER): | |
self.eat(LPAREN) | |
cast_type = self.type() | |
self.eat(RPAREN) | |
return CastExpression(cast_type, self.cast_expression()) | |
return self.unary_expression() | |
def unary_expression(self): | |
op = self.current_token | |
if self.current_token.type in (NEW, DELETE, DEFER): | |
self.eat(self.current_token.type) | |
return UnaryExpression(op, self.type()) | |
elif self.current_token.type == USING: | |
self.eat(USING) | |
return UnaryExpression(op, self.postfix_expression()) | |
if self.current_token.type in (INCREMENT, DECREMENT, SHIFT_RIGHT, SHIFT_LEFT, PLUS, MINUS, NOT, LOGICAL_NOT): | |
self.eat(self.current_token.type) | |
return UnaryExpression(op, self.unary_expression()) | |
return self.postfix_expression() | |
def postfix_expression(self): | |
# the following list of tokens is all of the possible beginning tokens of a primary_expression | |
if self.current_token.type in (VOID, BOOL, SIGNED8, SIGNED16, SIGNED32, SIGNED64, UNSIGNED8, UNSIGNED16, UNSIGNED32, UNSIGNED64, FLOAT32, FLOAT64, STRING, INTEGER_CONSTANT, FLOAT_CONSTANT, BOOLEAN_CONSTANT, CHAR_CONSTANT, STRING_CONSTANT, LPAREN): | |
return self.primary_expression() | |
node = self.primary_expression() | |
while self.current_token.type in (LSQUARE, LPAREN, DOT, INCREMENT, DECREMENT): | |
op = self.current_token | |
right = None | |
if self.current_token.type == LSQUARE: | |
self.eat(LSQUARE) | |
right = self.expression() | |
self.eat(RSQUARE) | |
elif self.current_token.type == LPAREN: | |
self.eat(LPAREN) | |
right = self.argument_expression_list() | |
self.eat(RPAREN) | |
elif self.current_token.type == DOT: | |
self.eat(DOT) | |
right = self.variable() | |
elif self.current_token.type in (INCREMENT, DECREMENT): | |
self.eat(self.current_token.type) | |
else: | |
self.error('Expected a valid postfix operator but got: ' + str(self.current_token) + ' instead.') | |
node = PostfixExpression(node, op, right) | |
return node | |
def primary_expression(self): | |
if self.current_token.type == LPAREN: | |
self.eat(LPAREN) | |
expr = self.expression() | |
self.eat(RPAREN) | |
return PrimaryExpression(expr) | |
elif self.current_token.type in (INTEGER_CONSTANT, FLOAT_CONSTANT, BOOLEAN_CONSTANT, CHAR_CONSTANT, STRING_CONSTANT): | |
return PrimaryExpression(self.constant()) | |
return PrimaryExpression(self.variable()) | |
def argument_expression_list(self): | |
exprs = [self.assignment_expression()] | |
while self.current_token.type == COMMA: | |
self.eat(COMMA) | |
exprs.append(self.assignment_expression()) | |
return ArgumentExpressionList(exprs) | |
########################## | |
# | |
# AST Walker | |
# | |
# The AST walker, or the Node Visitor, accepts an AST and visits each node within the tree | |
# calling the node type's respective procedure | |
# | |
########################## | |
class NodeVisitor(object): | |
def visit(self, node): | |
print('Visiting node: ' + type(node).__name__) | |
method_name = 'visit_' + type(node).__name__ | |
visitor = getattr(self, method_name, self.generic_visit) | |
return visitor(node) | |
def generic_visit(self, node): | |
raise Exception('There is no matching visit_{method} method'.format(method = type(node).__name__)) | |
########################## | |
# | |
# Symbols | |
# | |
# In order to properly determine anything that is imperitive or implicit, we need | |
# a collection of Variables, Functions, Types, et cetera | |
# | |
########################## | |
class Symbol(object): | |
def __init__(self, name): | |
self.name = name | |
class FunctionSymbol(Symbol): | |
def __init__(self, name, arguments, return_types, body): | |
super(FunctionSymbol, self).__init__(name) | |
self.arguments = arguments | |
self.return_types = return_types | |
self.body = body | |
class FunctionArgumentSymbol(Symbol): | |
def __init__(self, name, type, default_value): | |
super(FunctionArgument, self).__init__(name) | |
self.type = type | |
self.default_value = default_value | |
class StructSymbol(Symbol): | |
def __init__(self, name, members): | |
super(StructSymbol, self).__init__(name) | |
self.members = members | |
class StructMemberSymbol(Symbol): | |
def __init__(self, name, type, default_value): | |
super(StructMemberSymbol, self).__init__(name) | |
self.type = type | |
self.default_value = default_value | |
class EnumSymbol(Symbol): | |
def __init__(self, name, type, members): | |
super(EnumSymbol, self).__init__(name) | |
self.type = type | |
self.members = members | |
class EnumMemberSymbol(Symbol): | |
def __init__(self, name, value): | |
super(EnumMemberSymbol, self).__init__(name) | |
self.value = value | |
class AliasSymbol(Symbol): | |
def __init__(self, name, alias): | |
super(AliasSymbol, self).__init__(name) | |
self.alias = alias | |
class VariableSymbol(Symbol): | |
def __init__(self, name, type): | |
super(VariableSymbol, self).__init__(name) | |
self.type = type | |
class ConstantSymbol(Symbol): | |
def __init__(self, name, type): | |
super(ConstantSymbol, self).__init__(name) | |
self.type = type | |
class ReservedTypeSymbol(Symbol): | |
def __init__(self, name): | |
super(ReservedTypeSymbol, self).__init__(name) | |
class SymbolTable(object): | |
def __init__(self): | |
self.symbols = OrderedDict() | |
self.init_reserved_symbols() | |
def init_reserved_symbols(self): | |
self.define(ReservedTypeSymbol('any')) | |
self.define(ReservedTypeSymbol('void')) | |
self.define(ReservedTypeSymbol('bool')) | |
self.define(ReservedTypeSymbol('string')) | |
self.define(ReservedTypeSymbol('s8')) | |
self.define(ReservedTypeSymbol('s16')) | |
self.define(ReservedTypeSymbol('s32')) | |
self.define(ReservedTypeSymbol('s64')) | |
self.define(ReservedTypeSymbol('u8')) | |
self.define(ReservedTypeSymbol('u16')) | |
self.define(ReservedTypeSymbol('u32')) | |
self.define(ReservedTypeSymbol('u64')) | |
self.define(ReservedTypeSymbol('f32')) | |
self.define(ReservedTypeSymbol('f64')) | |
def __str__(self): | |
s = 'Symbols: {symbols}'.format( | |
symbols=[value for value in self.symbols.values()] | |
) | |
return s | |
__repr__ = __str__ | |
def define(self, symbol): | |
print('Defining %s ' % symbol) | |
self.symbols[symbol.name] = symbol | |
def lookup(self, name): | |
symbol = self.symbols.get(name) | |
return symbol | |
########################## | |
# | |
# Table Builder | |
# | |
# The global table builder transforms a tree into a Symbol Table of all of the globally defined | |
# functions, structs, enumerations, variables, and constants. | |
# | |
# For now this table builder will be double-pass. This means that, until the builder becomes n-pass, | |
# implicitly typed variables cannot be initialized by other implicitly typed variables! | |
# | |
########################## | |
class GlobalTableBuilder(NodeVisitor): | |
def __init__(self, translation_unit): | |
self.unit = translation_unit | |
def error(self, message): | |
raise Exception('Invalid syntax: ' + message) | |
def determine_type(self, initializer): | |
if isinstance(initializer, PrimaryExpression): | |
# TODO: Determine the type for non-constant primary expressions | |
if isinstance(initializer.expression, Constant): | |
pass | |
elif isinstance(initializer.expression, Variable): | |
pass | |
if isinstance(initializer, PostfixExpression): | |
# TODO: determine the type based off of the return type of a member, function, or array call | |
pass | |
self.error('Implicit declarations cannot be resolved at this time.') | |
def generate_table(self): | |
symtab = SymbolTable() | |
implicit_var_declarations = [] | |
implicit_const_declarations = [] | |
for external in self.unit.external_declarations: | |
# we generate functions and types first so that declarations with implied | |
# types can be generated properly afterwards | |
if isinstance(external.declaration, FunctionDefinition): | |
# Create a function symbol here | |
function = external.declaration | |
name = function.name | |
arguments = [] | |
if function.arguments is not None: | |
for arg in function.arguments.tight_declarations: | |
arguments.append(FunctionArgumentSymbol(arg.variable, arg.declarator, arg.initializer)) | |
returns = function.return_types | |
if function.foreign is None: | |
# this is a defined function, lets get its body (as a tree node) | |
symtab.define(FunctionSymbol(name, arguments, returns, function.right)) | |
else: | |
# this is a foreign function, has no actual body | |
symtab.define(FunctionSymbol(name, arguments, returns, function.foreign)) | |
else: | |
# here we only handle type declarations (struct, enum, or alias) | |
# and explicitly typed variable/constant declarations | |
# so that, in our next pass of the translation_unit, | |
# variable and constant declarations with implied types can be | |
# generated properly (we need an existing type or function table | |
# in order to determine a correct type!) | |
# | |
# this is a normal declaration | |
declaration = external.declaration | |
if isinstance(declaration.declarator, TypeDeclarator): | |
# this is a struct, enum, or alias | |
type_decl = declaration.declarator | |
if type_decl.type.type == STRUCT: | |
members = [] | |
for member in type_decl.declaration_list.declarations: | |
if isinstance(member.variable, VariableList): | |
for var in member.variable.variables: | |
members.append(StructMemberSymbol(var.token.value, member.declarator, member.initializer)) | |
else: | |
members.append(StructMemberSymbol(member.variable.token.value, member.declarator, member.initializer)) | |
symtab.define(StructSymbol(declaration.variable.token.value, members)) | |
elif type_decl.type.type == ENUM: | |
members = [] | |
for member in type_decl.declaration_list.declarations: | |
members.append(EnumMemberSymbol(member.variable.token.value, member.constant_expression)) | |
symtab.define(EnumSymbol(declaration.variable.token.value, type_decl.data_type, members)) | |
elif type_decl.type.type == ALIAS: | |
symtab.define(AliasSymbol(declaration.variable.token.value, type_decl.declaration_list)) | |
elif declaration.declarator is None: | |
# this declaration is implicitly-typeds | |
if declaration.constant: | |
# is constant, add to implicit_const_declarations | |
implicit_const_declarations.append(declaration) | |
else: | |
implicit_var_declarations.append(declaration) | |
else: | |
# this is a non-implicitly typed declaration | |
if declaration.constant: | |
# this is a constant declaration | |
if isinstance(declaration.variable, VariableList): | |
for var in declaration.variable.variables: | |
symtab.define(ConstantSymbol(var.token.value, declaration.declarator)) | |
else: | |
symtab.define(ConstantSymbol(declaration.variable.token.value, declaration.declarator)) | |
else: | |
# this is a variable declaration | |
if isinstance(declaration.variable, VariableList): | |
for var in declaration.variable.variables: | |
symtab.define(VariableSymbol(var.token.value, declaration.declarator)) | |
else: | |
symtab.define(VariableSymbol(declaration.variable.token.value, declaration.declarator)) | |
for declaration in implicit_const_declarations: | |
if isinstance(declaration.variable, VariableList): | |
for var in declaration.variable.variables: | |
symtab.define(ConstantSymbol(var.token.value, self.determine_type(declaration.initializer))) | |
else: | |
symtab.define(ConstantSymbol(declaration.variable.token.value, self.determine_type(declaration.initializer))) | |
for declaration in implicit_var_declarations: | |
if isinstance(declaration.variable, VariableList): | |
for var in declaration.variable.variables: | |
symtab.define(VariableSymbol(var.token.value, self.determine_type(declaration.initializer))) | |
else: | |
symtab.define(VariableSymbol(declaration.variable.token.value, self.determine_type(declaration.initializer))) | |
return symtab | |
def generate_header(self, table): | |
result = '' | |
structs = [] | |
enums = [] | |
functions = [] | |
constants = [] | |
variables = [] | |
for key in table.symbols: | |
value = table.symbols[key] | |
if isinstance(value, StructSymbol): | |
structs.append(value) | |
elif isinstance(value, EnumSymbol): | |
enums.append(value) | |
elif isinstance(value, FunctionSymbol): | |
functions.append(value) | |
elif isinstance(value, ConstantSymbol): | |
functions.append(value) | |
elif isinstance(value, VariableSymbol): | |
variables.append(value) | |
for struct in structs: | |
result += 'typedef struct {\n' | |
for member in struct.members: | |
result += str(self.visit(member.type)) + ' ' + str(member.name) + ' ' | |
if member.default_value is not None: | |
result += str(self.visit(member.default_value)) | |
result += ';\n' | |
result += '} ' + struct.name + ';\n' | |
return result | |
def visit_VariableDeclarator(self, node): | |
pointer = '' | |
if node.pointer is not None: | |
pointer += self.visit(node.pointer) | |
return pointer + self.visit(direct_declarator) | |
def visit_DirectDeclarator(self, node): | |
if node.left is None and node.right is not None: | |
return Dynamic_Array_C.replace('T', self.visit(node.right)) | |
elif node.left is not None and node.right is None: | |
if isinstance(node.left, Type): | |
return self.visit(node.left) | |
elif isinstance(node.left, VariableDeclarator): | |
return '(' + self.visit(node.left) + ')' | |
elif node.left is not None and node.right is not None: | |
return Static_Array_C.replace('T', self.visit(node.right)) | |
def visit_Pointer(self, node): | |
pointer = '*' | |
if node.pointer is not None: | |
pointer += self.visit(node.pointer) | |
return pointer | |
class AssemblyBuilder(Nodevisitor): | |
def __init__(self, translation_unit): | |
self.translation_unit = translation_unit | |
def error(self, message): | |
raise Exception('Invalid syntax: ' + message) | |
def generate(self): | |
self.visit(self.translation_unit) | |
def visit_TranslationUnit(self): | |
result = '' | |
for external in self.external_declarations: | |
result += self.visit(external) | |
return result | |
def visit_ | |
def main(): | |
import sys | |
text = '' | |
if len(sys.argv) == 1: | |
return | |
with open(sys.argv[1], 'r') as file: | |
text = file.read() | |
lexer = Lexer(text) | |
tokens = lexer.tokenize() | |
parser = Parser(tokens) | |
tree = parser.parse() | |
asmGen = AssemblyBuilder(tree) | |
asm = asmGem.generate() | |
print('\n\n' + asm) | |
#headGen = GlobalTableBuilder(tree) | |
#table = headGen.generate_table() | |
#print('\n\n') | |
#print(headGen.generate_header(table)) | |
#lexer = Lexer(text) | |
#parser = Parser(lexer) | |
#tree = parser.parse() | |
#transliterator = Transliterator(tree) | |
#result = transliterator.translit() | |
#with open('temp/__out.c', 'w') as out_file: | |
# out_file.write(result) | |
#call(['gcc', '__out.c', '-g', 'out.exe']) | |
#call(['make']) | |
#print('') | |
#print(result) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment