Created
June 24, 2019 03:04
-
-
Save WesleyAC/3f4107976512d09517d6b2cf6f5ec7a2 to your computer and use it in GitHub Desktop.
janky z80 assembler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# vim: ts=11 | |
# Copyright 2019 Google LLC | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
# A SUPER JANK Z80 assembler, written to see if writing a assembler that parses | |
# the opcode table was reasonable. | |
# (see https://twitter.com/WAptekar/status/1142828240874221568) | |
# | |
# It pretty much works, modulo a few things: | |
# * Non-canonical representations are not supported (for instance, you must | |
# write `sub 42` instead of `sub a, 42`). | |
# * `djnz` is broken. This is fixable, but more effort than I want to put into | |
# a quick hack like this (the assembler currently doesn't know about relative | |
# offsets). | |
# * Probably more stuff's broken as well? | |
# | |
# Usage: asm.py input.asm output.bin | |
import sys, re, ast | |
# http://z80-heaven.wikidot.com/opcode-reference-chart | |
tbl = """ 0 1 2 3 4 5 6 7 8 9 A B C D E F | |
0 nop ld bc,xx ld (bc),a inc bc inc b dec b ld b,x rlca ex af,af' add hl,bc ld a,(bc) dec bc inc c dec c ld c,x rrca | |
1 djnz x ld de,xx ld (de),a inc de inc d dec d ld d,x rla jr x add hl,de ld a,(de) dec de inc e dec e ld e,x rra | |
2 jr nz,x ld hl,xx ld (xx),hl inc hl inc h dec h ld h,x daa jr z,x add hl,hl ld hl,(xx) dec hl inc l dec l ld l,x cpl | |
3 jr nc,x ld sp,xx ld (xx),a inc sp inc (hl) dec (hl) ld (hl),x scf jr c,x add hl,sp ld a,(xx) dec sp inc a dec a ld a,x ccf | |
4 ld b,b ld b,c ld b,d ld b,e ld b,h ld b,l ld b,(hl) ld b,a ld c,b ld c,c ld c,d ld c,e ld c,h ld c,l ld c,(hl) ld c,a | |
5 ld d,b ld d,c ld d,d ld d,e ld d,h ld d,l ld d,(hl) ld d,a ld e,b ld e,c ld e,d ld e,e ld e,h ld e,l ld e,(hl) ld e,a | |
6 ld h,b ld h,c ld h,d ld h,e ld h,h ld h,l ld h,(hl) ld h,a ld l,b ld l,c ld l,d ld l,e ld l,h ld l,l ld l,(hl) ld l,a | |
7 ld (hl),b ld (hl),c ld (hl),d ld (hl),e ld (hl),h ld (hl),l halt ld (hl),a ld a,b ld a,c ld a,d ld a,e ld a,h ld a,l ld a,(hl) ld a,a | |
8 add a,b add a,c add a,d add a,e add a,h add a,l add a,(hl) add a,a adc a,b adc a,c adc a,d adc a,e adc a,h adc a,l adc a,(hl) adc a,a | |
9 sub b sub c sub d sub e sub h sub l sub (hl) sub a sbc a,b sbc a,c sbc a,d sbc a,e sbc a,h sbc a,l sbc a,(hl) sbc a,a | |
A and b and c and d and e and h and l and (hl) and a xor b xor c xor d xor e xor h xor l xor (hl) xor a | |
B or b or c or d or e or h or l or (hl) or a cp b cp c cp d cp e cp h cp l cp (hl) cp a | |
C ret nz pop bc jp nz,xx jp xx call nz,xx push bc add a,x rst 00h ret z ret jp z,xx xxBITxx call z,xx call xx adc a,x rst 08h | |
D ret nc pop de jp nc,xx out (x),a call nc,xx push de sub x rst 10h ret c exx jp c,xx in a,(x) call c,xx xxIXxx sbc a,x rst 18h | |
E ret po pop hl jp po,xx ex (sp),hl call po,xx push hl and x rst 20h ret pe jp (hl) jp pe,xx ex de,hl call pe,xx xx80xx xor x rst 28h | |
F ret p pop af jp p,xx di call p,xx push af or x rst 30h ret m ld sp,hl jp m,xx ei call m,xx xxIYxx cp x rst 38h""" | |
def parse_int(s): | |
try: | |
if s[0] == "'" and s[-1] == "'": | |
return ord(ast.literal_eval(s)) | |
elif s[-1] == "h": | |
return int(s[:-1], base=16) | |
elif s[:2] == "0x": | |
return int(s, base=16) | |
return int(s) | |
except ValueError: | |
return None | |
def parse_inst(inst, opcodes): | |
if inst.strip()[:2] == "db": | |
out = [] | |
for item in ast.literal_eval("[" + inst[3:] + "]"): | |
if type(item) == int: | |
out.append(item) | |
if type(item) == str: | |
out += map(ord, item) | |
return out | |
inst = inst.replace(",", " ").split() | |
for opcode, mc in opcodes: | |
out = [mc] | |
if len(inst) != len(opcode): continue | |
for i in range(len(inst)): | |
if inst[i].lower() != opcode[i]: | |
if ("(" in opcode[i]) and not (inst[i][0] == "(" and inst[i][-1] == ")"): break | |
n = parse_int(inst[i].replace("(", "").replace(")", "").strip()) | |
if opcode[i] in ["x", "(x)"]: | |
if n is not None and n >= 0 and n <= 2**8: | |
out.append(n) | |
else: | |
break | |
elif opcode[i] in ["xx", "(xx)"]: | |
if n is not None and n >= 0 and n <= 2**16: | |
out.append((n >> 8) & 0xff) | |
out.append(n & 0xff) | |
elif "(" not in inst[i]: # label | |
out.append(inst[i]) | |
else: | |
break | |
else: | |
break | |
else: | |
return out | |
return None | |
def parse_table(tbl): | |
opcodes = [] | |
for nibble1, line in enumerate(tbl.split("\n")[1:]): | |
for nibble2, inst in enumerate(line.split("\t")[1:]): | |
opcodes.append( | |
(inst.replace(",", " ").split(), | |
(nibble1 << 4) + nibble2)) | |
return opcodes | |
if __name__ == "__main__": | |
opcodes = parse_table(tbl) | |
label = re.compile(r"([a-zA-Z0-9_\.]+):") | |
comment = re.compile(r"([^;]*)(;.*)?") | |
ops = [] | |
labels = {} | |
with open(sys.argv[1]) as f: | |
for line in f.readlines(): | |
line = comment.match(line).groups()[0] | |
label_match = label.match(line) | |
if label_match: | |
labels[label_match.groups()[0]] = sum([1 if type(x) == int else 2 for x in ops]) | |
line = line[label_match.span()[1]:] | |
if line.strip() != "": | |
ops += parse_inst(line, opcodes) | |
new_ops = [] | |
for op in ops: | |
if type(op) == int: | |
new_ops.append(op) | |
elif type(op) == str: | |
new_ops.append(labels[op] & 0xff) | |
new_ops.append((labels[op] >> 8) & 0xff) | |
with open(sys.argv[2], 'wb') as out: | |
out.write(bytes(new_ops)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment