Created
January 27, 2015 00:24
-
-
Save Katharine/f7ddea34bb0c6efe6abe to your computer and use it in GitHub Desktop.
pyzasm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from array import array | |
class ZStatement(object): | |
def __init__(self, label=None, opcode=None, operands=None, store=False, branch=False): | |
super(ZStatement, self).__init__() | |
self.label = label | |
self.opcode = opcode | |
self.operands = operands | |
self.store = store | |
self.branch = branch | |
class ZVar(object): | |
def __init__(self, num): | |
self.num = num | |
def __repr__(self): | |
return "ZVar(%s)" % num | |
class ZLabel(object): | |
def __init__(self, label, packed=False, absolute=False): | |
self.label = label | |
self.packed = packed | |
self.absolute = absolute | |
def __repr__(self): | |
return "ZLabel('%s', %s, %s)" % (self.label, self.packed, self.absolute) | |
class ZString(object): | |
a0 = 'abcdefghijklmnopqrstuvwxyz' | |
a1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
a2 = " \n0123456789.,!?_#'\"/\\-:()" | |
def __init__(self, chars): | |
self.chars = chars | |
def get_bytes(self, length=None): | |
if length is None: | |
length = len(self.chars) / 1.5 | |
length = int(math.ceil(length)) | |
if length % 2 == 1: | |
length += 1 | |
zchar_limit = int(length / 2 * 3) | |
zchars = self.chars | |
while len(zchars) < zchar_limit: | |
zchars.append(5) | |
zchars = zchars[0:zchar_limit] | |
words = [] | |
for i in range(0, zchar_limit, 3): | |
word = 0 | |
if i >= zchar_limit - 3: | |
word |= 0x8000 # Set end-of-string flag | |
word |= zchars[i] << 10 | |
word |= zchars[i+1] << 5 | |
word |= zchars[i+2] | |
high_byte = word >> 8 | |
low_byte = word & 0xFF | |
words.append(high_byte) | |
words.append(low_byte) | |
return words | |
@classmethod | |
def from_ascii(self, ascii): | |
zscii = [ord(z) for z in ascii] | |
zchars = [] | |
for i in range(0, len(zscii)): | |
char = chr(zscii[i]) | |
pos = self.a0.find(char) | |
if char == ' ': | |
zchars.append(0) | |
elif pos > -1: | |
zchars.append(pos + 6) | |
else: | |
pos = self.a1.find(char) | |
if pos > -1: | |
zchars.append(4) | |
zchars.append(pos + 6) | |
else: | |
pos = self.a2.find(char) | |
if pos > -1: | |
zchars.append(5) | |
zchars.append(pos + 6) | |
else: | |
o = ord(char) | |
zchars.extend((5, 6, o >> 5, o & 0x1F)) | |
return ZString(zchars) | |
class ZOp(object): | |
operands = { | |
'je': {'operands': 2, 'opcode': 1, 'branch': True}, | |
'jl': {'operands': 2, 'opcode': 2, 'branch': True}, | |
'jg': {'operands': 2, 'opcode': 3, 'branch': True}, | |
'dec_chk': {'operands': 2, 'opcode': 4, 'branch': True, 'byref': 0}, | |
'inc_chk': {'operands': 2, 'opcode': 5, 'branch': True, 'byref': 0}, | |
'jin': {'operands': 2, 'opcode': 6, 'branch': True}, | |
'test': {'operands': 2, 'opcode': 7, 'branch': True}, | |
'or': {'operands': 2, 'opcode': 8, 'store': True}, | |
'and': {'operands': 2, 'opcode': 9, 'store': True}, | |
'test_attr': {'operands': 2, 'opcode': 10, 'branch': True}, | |
'set_attr': {'operands': 2, 'opcode': 11}, | |
'clear_attr': {'operands': 2, 'opcode': 12}, | |
'store': {'operands': 2, 'opcode': 13, 'byref': 0}, | |
'insert_obj': {'operands': 2, 'opcode': 14}, | |
'loadw': {'operands': 2, 'opcode': 15, 'store': True}, | |
'loadb': {'operands': 2, 'opcode': 16, 'store': True}, | |
'get_prop': {'operands': 2, 'opcode': 17, 'store': True}, | |
'get_prop_addr': {'operands': 2, 'opcode': 18, 'store': True}, | |
'get_next_prop': {'operands': 2, 'opcode': 19, 'store': True}, | |
'add': {'operands': 2, 'opcode': 20, 'store': True}, | |
'sub': {'operands': 2, 'opcode': 21, 'store': True}, | |
'mul': {'operands': 2, 'opcode': 22, 'store': True}, | |
'div': {'operands': 2, 'opcode': 23, 'store': True}, | |
'mod': {'operands': 2, 'opcode': 24, 'store': True}, | |
'jz': {'operands': 1, 'opcode': 0, 'branch': True}, | |
'get_sibling': {'operands': 1, 'opcode': 1, 'store': True, 'branch': True}, | |
'get_child': {'operands': 1, 'opcode': 2, 'store': True, 'branch': True}, | |
'get_parent': {'operands': 1, 'opcode': 3, 'store': True}, | |
'get_prop_len': {'operands': 1, 'opcode': 4, 'store': True}, | |
'inc': {'operands': 1, 'opcode': 5, 'byref': 0}, | |
'dec': {'operands': 1, 'opcode': 6, 'byref': 0}, | |
'print_addr': {'operands': 1, 'opcode': 7}, | |
'remove_obj': {'operands': 1, 'opcode': 9}, | |
'print_obj': {'operands': 1, 'opcode': 10}, | |
'ret': {'operands': 1, 'opcode': 11}, | |
'jump': {'operands': 1, 'opcode': 12}, | |
'print_paddr': {'operands': 1, 'opcode': 13}, | |
'load': {'operands': 1, 'opcode': 14, 'byref': 0}, | |
'not': {'operands': 1, 'opcode': 15}, | |
'rtrue': {'operands': 0, 'opcode': 0}, | |
'rfalse': {'operands': 0, 'opcode': 1}, | |
'print': {'operands': 0, 'opcode': 2, 'string': True}, | |
'print_ret': {'operands': 0, 'opcode': 3, 'string': True}, | |
'nop': {'operands': 0, 'opcode': 4}, | |
'save': {'operands': 0, 'opcode': 5, 'branch': True}, | |
'restore': {'operands': 0, 'opcode': 6, 'branch': True}, | |
'restart': {'operands': 0, 'opcode': 7}, | |
'ret_popped': {'operands': 0, 'opcode': 8}, | |
'pop': {'operands': 0, 'opcode': 9}, | |
'quit': {'operands': 0, 'opcode': 10}, | |
'new_line': {'operands': 0, 'opcode': 11}, | |
'show_status': {'operands': 0, 'opcode': 12}, | |
'verify': {'operands': 0, 'opcode': 13, 'branch': True}, | |
'call': {'operands': [1, 4], 'opcode': 0, 'store': True}, | |
'storew': {'operands': [3], 'opcode': 1}, | |
'storeb': {'operands': [3], 'opcode': 2}, | |
'put_prop': {'operands': [3], 'opcode': 3}, | |
'sread': {'operands': [2], 'opcode': 4}, | |
'print_char': {'operands': [1], 'opcode': 5}, | |
'print_num': {'operands': [1], 'opcode': 6}, | |
'random': {'operands': [1], 'opcode': 7, 'store': True}, | |
'push': {'operands': [1], 'opcode': 8}, | |
'pull': {'operands': [1], 'opcode': 9, 'byref': 0}, | |
'split_window': {'operands': [1], 'opcode': 10}, | |
'set_window': {'operands': [1], 'opcode': 11}, | |
'output_stream': {'operands': [1], 'opcode': 0x13}, | |
'input_stream': {'operands': [1], 'opcode': 0x14}, | |
'sound_effect': {'operands': [2, 4], 'opcode': 0x15}, | |
} | |
@classmethod | |
def get(self, name): | |
name = name.lower() | |
return ZOp(**self.operands[name]) | |
def __init__(self, operands=None, opcode=None, string=False, store=False, branch=False, byref=None): | |
self.operands = operands | |
self.opcode = opcode | |
self.string = string | |
self.store = store | |
self.branch = branch | |
self.byref = byref | |
def byte(self, as_var=False): | |
code = 0 | |
if isinstance(self.operands, list): | |
code |= 0xE0 | |
elif as_var and self.operands == 2: | |
# Fake var with two operands. | |
code |= 0xC0 | |
elif self.operands in (0, 1): | |
code |= 0x80 | |
if self.operands == 0: | |
code |= 0x30 | |
code |= self.opcode | |
return code | |
def generate_code(self, operands=None, store=None, branch=None, branch_truth=True, string=None): | |
if operands is None: | |
operands = [] | |
label_refs = {} | |
operand_types = [] | |
has_large = False | |
for i in xrange(4): | |
if len(operands) <= i: | |
operand_types.append(3) | |
elif isinstance(operands[i], ZVar): | |
operand_types.append(2) | |
elif isinstance(operands[i], ZLabel): | |
operand_types.append(0) | |
label_refs[i] = ZRef(operands[i].label, 16, operands[i].packed, operands[i].absolute) | |
operands[i] = 0x0000 # Placeholder, to be replaced later. | |
elif operands[i] > 255: | |
operand_types.append(0) | |
has_large = True | |
else: | |
operand_types.append(1) | |
output_bytes = [self.byte(self.operands == 2 and has_large)] | |
if self.operands == 1: | |
output_bytes[0] |= operand_types[0] << 4 | |
elif self.operands == 2 and not has_large: | |
output_bytes[0] |= (operand_types[0] & 2) << 5 | |
output_bytes[0] |= (operand_types[1] & 2) << 4 | |
elif self.operands != 0: | |
type_byte = 0x00 | |
operand_bytes = [] | |
for i in xrange(4): | |
type_byte |= operand_types[i] << ((3 - i) * 2) | |
if type_byte != 0: | |
output_bytes.append(type_byte) | |
real_label_refs = {} | |
for i in xrange(4): | |
t = operand_types[i] | |
if t == 0: | |
output_bytes.append(operands[i] >> 8) | |
output_bytes.append(operands[i] & 0xFF) | |
if i in label_refs: | |
real_label_refs[len(output_bytes) - 1] = label_refs[i] | |
elif t == 1: | |
output_bytes.append(operands[i]) | |
elif t == 2: | |
output_bytes.append(operands[i].num) | |
label_refs = real_label_refs | |
if self.store: | |
if store is None: | |
raise SyntaxError, "No store included for store operation" | |
output_bytes.append(store) | |
if self.branch: | |
if branch is None: | |
raise SyntaxError, "No branch included for branch operation" | |
branch_byte = 0 | |
print "branch_truth: %s" % branch_truth | |
if branch_truth: | |
branch_byte |= 0x80 | |
if isinstance(branch, ZLabel): | |
label_refs[len(output_bytes) + 1] = ZRef(branch.label, 14) | |
branch = 0x00FF # Placeholder (must fit in 14 bits) | |
if branch > 63 or branch < 0: | |
branch_byte |= (branch >> 8) & 0x3F | |
output_bytes.extend((branch_byte, branch & 0xFF)) | |
else: | |
branch_byte |= 0x40 | branch | |
output_bytes.append(branch_byte) | |
if self.string: | |
if string is None: | |
string = "" | |
output_bytes.extend(ZString.from_ascii(string).get_bytes()) | |
return ZOpCode(output_bytes, label_refs) | |
class ZOpCode(object): | |
def __init__(self, bytes, labels): | |
super(ZOpCode, self).__init__() | |
self.bytes = bytes | |
self.labels = labels | |
class ZRef(object): | |
def __init__(self, label, bits, packed=False, absolute=False): | |
super(ZRef, self).__init__() | |
self.label = label | |
self.bits = bits | |
self.packed = packed | |
self.absolute = absolute | |
def __repr__(self): | |
return "ZRef('%s', %s, %s, %s)" % (self.label, self.bits, self.packed, self.absolute) | |
class ZCode(object): | |
def __init__(self): | |
self.bytecode = array('B') | |
self.labels = {} | |
self.refs = {} | |
self.globals = {} | |
self.locals = {} | |
self.pointer = 0 | |
preamble = self.generate_preamble() | |
self.bytecode.extend(preamble) | |
self.pointer = len(preamble) | |
def sign(self, unsigned, bits=16): | |
if unsigned & (1 << (bits - 1)): | |
return unsigned - (1 << bits) | |
return unsigned | |
def generate_preamble(self): | |
preamble = array('B', [0 for x in xrange(0x12C)]) | |
preamble[0] = 3 # Version | |
preamble[1] = 0 | |
preamble[2] = 0 # Release byte 1 | |
preamble[3] = 0 # Release byte 2 | |
preamble[4] = 0xFF # High memory | |
preamble[5] = 0xFF | |
preamble[6] = 0x01 # PC start | |
preamble[7] = 0x2C # cont | |
preamble[8] = 0x00 # Location of dictionary | |
preamble[9] = 0x38 # cont. | |
preamble[0xA] = 0x00 # Location of object table | |
preamble[0xB] = 0x00 # cont. | |
preamble[0xC] = 0x00 # Location of global variables | |
preamble[0xD] = 0x3C # cont. | |
preamble[0xE] = 0xFF # Static base | |
preamble[0xF] = 0xFF # cont. | |
preamble[0x10] = 0 # Flags 2 | |
preamble[0x18] = 0x00 # Abbrevation table | |
preamble[0x19] = 0x00 # cont. | |
preamble[0x1A] = 0xFF # file size | |
preamble[0x1B] = 0xFF # cont. | |
# Empty dictionary stub | |
preamble[0x38] = 0 # No separators | |
preamble[0x39] = 4 # Four-byte entries | |
preamble[0x3A] = 0 # No entries | |
preamble[0x3B] = 0 # cont. | |
return preamble | |
def resolve_variable(self, var): | |
try: | |
return int(var) | |
except ValueError: | |
pass | |
if var == '$stack': | |
return 0 | |
elif var[0] == '$': | |
if var not in self.globals: | |
if len(self.globals) >= 240: | |
raise IndexError, "Attempted to use more than 240 global variables." | |
self.globals[var] = len(self.globals) + 0x10 | |
return self.globals[var] | |
else: | |
if var not in self.locals: | |
raise SyntaxError, "Attempted to use undeclared local variable '%s'." % var | |
return self.locals[var] | |
def parse_line(self, line): | |
if line == '': | |
return None | |
parts = line.split(' ') | |
# declaring a label | |
if parts[0][0] == '.': | |
label = parts.pop(0)[1:] | |
self.labels[label] = self.pointer | |
# declaring a method | |
if parts[0] == 'METHOD': | |
if self.pointer % 2 == 1: | |
self.pointer += 1 | |
self.bytecode.append(0) | |
parts.pop(0) | |
name = parts.pop(0) | |
if len(parts) > 15: | |
raise SyntaxError, "Too many local variables in method '%s'." % name | |
self.labels[name] = self.pointer | |
self.locals = {} | |
i = 1 | |
defaults = [] | |
for part in parts: | |
if '=' in part: | |
part = part.split('=') | |
defaults.append(int(part[1])) | |
part = part[0] | |
else: | |
defaults.append(0) | |
self.locals[part] = i | |
i += 1 | |
bytes = [] | |
bytes.append(len(self.locals)) | |
for default in defaults: | |
bytes.extend((default >> 8, default & 0xFF)) | |
print bytes | |
self.pointer += len(bytes) | |
self.bytecode.extend(bytes) | |
return | |
if parts[0] == 'TABLE': | |
parts.pop(0) | |
name = parts.pop(0) | |
length = int(parts.pop(0)) | |
bytes = [length] + [0] * length | |
bytes[1:len(parts)+1] = [int(x) for x in parts] | |
self.labels[name] = self.pointer | |
self.pointer += len(bytes) | |
self.bytecode.extend(bytes) | |
return | |
if len(parts) == 0: | |
return | |
opname = parts.pop(0) | |
op = ZOp.get(opname) | |
store = None | |
branch = None | |
branch_truth = True | |
branch_ref = None | |
operands = None | |
text = None | |
if op.store: | |
if '->' in parts: | |
index = parts.index('->') | |
del parts[index] | |
store = self.resolve_variable(parts[index]) | |
del parts[index] | |
else: | |
raise SyntaxError, "Didn't specify a store location for store op." | |
if op.string: | |
if ':' not in parts: | |
raise SyntaxError, "Didn't specify a string for inline string op." | |
index = parts.index(':') | |
text = ' '.join(parts[index+1:]) | |
parts = parts[:index] | |
if op.branch: | |
target = parts.pop() | |
if target[0] == '?': | |
target = target[1:] | |
if target[0] == '~': | |
branch_truth = False | |
target = target[1:] | |
if target == 'rtrue': | |
branch = 1 | |
elif target == 'rfalse': | |
branch = 0 | |
else: | |
try: | |
branch = int(target) | |
except ValueError: | |
branch = ZLabel(target) | |
if parts: | |
operands = [] | |
i = 0 | |
for part in parts: | |
if part == '': | |
continue | |
if part[0] == '?': | |
operands.append(ZLabel(part[1:])) | |
elif part[0] == '@': | |
operands.append(ZLabel(part[1:], packed=True, absolute=True)) | |
elif part[0] == '*': | |
operands.append(ZLabel(part[1:], absolute=True)) | |
else: | |
try: | |
operands.append(int(part)) | |
except: | |
ref_override = (part[0] == '[' and part[-1] == ']') | |
if ref_override: | |
part = part[1:-1] | |
var = self.resolve_variable(part) | |
if (op.byref == i and not ref_override) or (op.byref != i and ref_override): | |
operands.append(var) | |
else: | |
operands.append(ZVar(var)) | |
i += 1 | |
bytes = op.generate_code(operands=operands, store=store, string=text, branch=branch, branch_truth=branch_truth) | |
print bytes.bytes | |
self.bytecode.extend(bytes.bytes) | |
for i in bytes.labels: | |
self.refs[self.pointer + i] = bytes.labels[i] | |
self.pointer += len(bytes.bytes) | |
def parse_asm(self, asm): | |
asm = asm.split("\n") | |
for line in asm: | |
self.parse_line(line.lstrip()) | |
#return self.bytecode | |
# We now need to go back and fix the label refs | |
print "References: %s" % self.refs | |
for pos in self.refs: | |
label = self.refs[pos] | |
if label.label not in self.labels: | |
raise KeyError, "Unknown label %s" % label.label | |
if not label.absolute: | |
jump = self.labels[label.label] - pos + 1 | |
else: | |
jump = self.labels[label.label] | |
if label.packed: | |
jump = jump / 2 | |
if label.bits > 8: | |
print jump | |
print hex(self.bytecode[pos-1]) | |
print hex(self.bytecode[pos-1] & (0xFF >> (8 - (label.bits - 8)))), '->', | |
#self.bytecode[pos - 1] &= ~(0xFF >> (8 - (label.bits - 8))) | |
self.bytecode[pos - 1] |= (jump >> 8) & (0xFF >> (8 - (label.bits - 8))) | |
print hex(self.bytecode[pos-1] & (0xFF >> (8 - (label.bits - 8)))) | |
print hex(self.bytecode[pos-1]) | |
print hex(self.bytecode[pos]), '->', | |
self.bytecode[pos] = jump & 0xFF | |
print hex(self.bytecode[pos]) | |
# We should be good! | |
return self.bytecode | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment