Created
May 29, 2025 18:38
-
-
Save deniska/4e84fcca0a0f945fc99a72ed313497c1 to your computer and use it in GitHub Desktop.
simple unconventional uxn assembler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
org 0x10 | |
Console: | |
.vector: rb 2 | |
.read: rb 1 | |
.pad: rb 4 | |
.type: rb 1 | |
.write: rb 1 | |
.error: rb 1 | |
org 0x100 | |
main: | |
lit2 str | |
.loop: | |
ldak | |
dup | |
jci .cont | |
jmi .quit | |
.cont: | |
lit Console.write | |
deo | |
inc2 | |
jmi .loop | |
.quit: | |
brk | |
str: bytes "Hello world", 0x0a, 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from dataclasses import dataclass | |
import collections | |
import enum | |
import string | |
class TokenType(enum.Enum): | |
identifier = enum.auto() | |
number = enum.auto() | |
string = enum.auto() | |
comma = enum.auto() | |
colon = enum.auto() | |
newline = enum.auto() | |
@dataclass | |
class Token: | |
token_type: TokenType | |
value: object | |
identifier_start = set(string.ascii_letters + '.') | |
identifier_cont = identifier_start | set(string.digits) | |
class Tokenizer: | |
def __init__(self, input_file): | |
self.input_file = input_file | |
self.stash = None | |
def next(self): | |
if self.stash is None: | |
return self.input_file.read(1) | |
else: | |
c = self.stash | |
self.stash = None | |
return c | |
def putback(self, c): | |
self.stash = c | |
def tokenize(self): | |
while True: | |
c = self.next() | |
if c == '': | |
break | |
elif c in ' \t\r': | |
continue | |
elif c == '\n': | |
yield Token(TokenType.newline, '\n') | |
elif c in identifier_start: | |
cur_token = [c] | |
while (c := self.next()) in identifier_cont: | |
cur_token.append(c) | |
name = ''.join(cur_token) | |
yield Token(TokenType.identifier, name) | |
self.putback(c) | |
elif c in string.digits: | |
digits = [c] | |
base = 10 | |
digit_chars = string.digits | |
if c == '0': | |
if (c := self.next()) == 'x': | |
base = 16 | |
digit_chars = string.hexdigits | |
else: | |
self.putback(c) | |
while (c := self.next()) in digit_chars: | |
digits.append(c) | |
yield Token(TokenType.number, int(''.join(digits), base)) | |
self.putback(c) | |
elif c == ':': | |
yield Token(TokenType.colon, ':') | |
elif c == ',': | |
yield Token(TokenType.comma, ',') | |
elif c == ';': | |
while (c := self.next()) not in ('\n', ''): | |
pass | |
self.putback(c) | |
elif c == '"': | |
chars = [] | |
while True: | |
c = self.next() | |
if c == '"': | |
break | |
elif c == '\\': | |
c = self.next() | |
if c == '"': | |
chars.append(c) | |
elif c == '\\': | |
chars.append(c) | |
else: | |
raise ValueError(f'Unknown string escape: {c!r}') | |
elif c in ('\n', ''): | |
raise ValueError('Unclosed string') | |
chars.append(c) | |
yield Token(TokenType.string, ''.join(chars)) | |
else: | |
raise ValueError(f'Unknown character: {c!r}') | |
class Assembler: | |
def __init__(self, input_file, output_file): | |
self.cur = 0 | |
self.out_buf = bytearray() | |
self.output_file = output_file | |
self.tokens = Tokenizer(input_file).tokenize() | |
self.stash = None | |
self.prev_label = '' | |
self.labels = {} | |
self.pending_labels_wide = collections.defaultdict(list) | |
self.pending_labels = collections.defaultdict(list) | |
self.relative_addrs = set() | |
def next(self): | |
if self.stash: | |
tok = self.stash | |
self.stash = None | |
return tok | |
else: | |
try: | |
tok = next(self.tokens) | |
return tok | |
except StopIteration: | |
return None | |
def putback(self, tok): | |
self.stash = tok | |
def write_byte(self, b): | |
if b > 255: | |
raise ValueError(f'Numeric immediate too big for byte: {b}') | |
while len(self.out_buf) <= self.cur: | |
self.out_buf.append(0) | |
self.out_buf[self.cur] = b | |
self.cur += 1 | |
def write_short(self, num): | |
if num > 65535: | |
raise ValueError(f'Numeric immediate too big for short: {num}') | |
self.write_byte(num >> 8) | |
self.write_byte(num & 0xff) | |
def add_label(self, tok): | |
name = tok.value | |
if name[0] == '.': | |
name = self.prev_label + name | |
else: | |
self.prev_label = name | |
if name in self.labels: | |
raise ValueError(f'Label {name!r} already defined') | |
self.labels[name] = self.cur | |
def assemble(self): | |
while True: | |
tok = self.next() | |
if tok is None: | |
break | |
elif tok.token_type == TokenType.newline: | |
continue | |
elif tok.token_type == TokenType.identifier: | |
n = self.next() | |
if n.token_type == TokenType.colon: | |
self.add_label(tok) | |
tok = self.next() | |
else: | |
self.putback(n) | |
if tok is not None and tok.token_type != TokenType.newline: | |
self.parse_identifier(tok) | |
else: | |
raise ValueError(f'Dunno what to do with {tok}') | |
for label, addrs in self.pending_labels_wide.items(): | |
if label not in self.labels: | |
raise ValueError(f'Label not found: {label!r}') | |
label_addr = self.labels[label] | |
for addr in addrs: | |
self.cur = addr | |
if addr in self.relative_addrs: | |
self.write_short((label_addr - addr - 2) & 0xffff) | |
else: | |
self.write_short(label_addr) | |
for label, addrs in self.pending_labels.items(): | |
if label not in self.labels: | |
raise ValueError(f'Label not found: {label!r}') | |
label_addr = self.labels[label] | |
for addr in addrs: | |
self.cur = addr | |
if addr in self.relative_addrs: | |
self.write_byte((label_addr - addr - 2) & 0xff) | |
else: | |
self.write_byte(label_addr) | |
self.output_file.write(self.out_buf[0x100:]) | |
def expect(self, token_type): | |
tok = self.next() | |
if tok is None: | |
raise ValueError(f'Expected {token_type}, got EOF') | |
if tok.token_type != token_type: | |
raise ValueError(f'Expected {token_type}, got {tok.token_type}') | |
return tok | |
def expectmany(self, *token_types): | |
tok = self.next() | |
for token_type in token_types: | |
if tok.token_type == token_type: | |
return tok | |
raise ValueError(f'Expected one of: {token_types}, got {tok.token_type}') | |
def parse_immediate(self, is_wide, is_relative): | |
tok = self.expectmany(TokenType.number, TokenType.identifier) | |
if tok.token_type == TokenType.identifier: | |
name = tok.value | |
if is_relative: | |
self.relative_addrs.add(self.cur) | |
if name[0] == '.': | |
name = self.prev_label + name | |
if is_wide: | |
self.pending_labels_wide[name].append(self.cur) | |
self.write_short(0) | |
else: | |
self.pending_labels[name].append(self.cur) | |
self.write_byte(0) | |
elif tok.token_type == TokenType.number: | |
num = tok.value | |
if is_wide: | |
self.write_short(num) | |
else: | |
self.write_byte(num) | |
def parse_identifier(self, tok): | |
ident = tok.value | |
identl = ident.lower() | |
if ident == 'org': | |
tok = self.expect(TokenType.number) | |
self.cur = tok.value | |
self.expect(TokenType.newline) | |
elif ident == 'rb': | |
tok = self.expect(TokenType.number) | |
self.cur += tok.value | |
self.expect(TokenType.newline) | |
elif ident == 'bytes': | |
while True: | |
tok = self.next() | |
if tok.token_type == TokenType.string: | |
for b in tok.value.encode('ascii'): | |
self.write_byte(b) | |
elif tok.token_type == TokenType.number: | |
self.write_byte(tok.value) | |
else: | |
raise ValueError(f'Expected number or string, got {tok.token_type}') | |
tok = self.next() | |
if tok is None or tok.token_type == TokenType.newline: | |
break | |
if tok.token_type != TokenType.comma: | |
raise ValueError(f'Expected comma, got {tok.token_type}') | |
elif identl in opcode_to_byte: | |
self.write_byte(opcode_to_byte[identl]) | |
is_relative = False | |
for rel in relative: | |
if identl.startswith(rel): | |
is_relative = True | |
break | |
if identl in have_immediate: | |
self.parse_immediate(identl in imm2, is_relative) | |
self.expect(TokenType.newline) | |
else: | |
raise ValueError(f'Unknown instruction or directive: {ident!r}') | |
def main(): | |
Assembler(open(sys.argv[1]), open(sys.argv[2], 'wb')).assemble() | |
opcodes = [ | |
'BRK', 'INC', 'POP', 'NIP', 'SWP', 'ROT', 'DUP', 'OVR', | |
'EQU', 'NEQ', 'GTH', 'LTH', 'JMP', 'JCN', 'JSR', 'STH', | |
'LDZ', 'STZ', 'LDR', 'STR', 'LDA', 'STA', 'DEI', 'DEO', | |
'ADD', 'SUB', 'MUL', 'DIV', 'AND', 'ORA', 'EOR', 'SFT', | |
'JCI', 'INC2', 'POP2', 'NIP2', 'SWP2', 'ROT2', 'DUP2', 'OVR2', | |
'EQU2', 'NEQ2', 'GTH2', 'LTH2', 'JMP2', 'JCN2', 'JSR2', 'STH2', | |
'LDZ2', 'STZ2', 'LDR2', 'STR2', 'LDA2', 'STA2', 'DEI2', 'DEO2', | |
'ADD2', 'SUB2', 'MUL2', 'DIV2', 'AND2', 'ORA2', 'EOR2', 'SFT2', | |
'JMI', 'INCr', 'POPr', 'NIPr', 'SWPr', 'ROTr', 'DUPr', 'OVRr', | |
'EQUr', 'NEQr', 'GTHr', 'LTHr', 'JMPr', 'JCNr', 'JSRr', 'STHr', | |
'LDZr', 'STZr', 'LDRr', 'STRr', 'LDAr', 'STAr', 'DEIr', 'DEOr', | |
'ADDr', 'SUBr', 'MULr', 'DIVr', 'ANDr', 'ORAr', 'EORr', 'SFTr', | |
'JSI', 'INC2r', 'POP2r', 'NIP2r', 'SWP2r', 'ROT2r', 'DUP2r', 'OVR2r', | |
'EQU2r', 'NEQ2r', 'GTH2r', 'LTH2r', 'JMP2r', 'JCN2r', 'JSR2r', 'STH2r', | |
'LDZ2r', 'STZ2r', 'LDR2r', 'STR2r', 'LDA2r', 'STA2r', 'DEI2r', 'DEO2r', | |
'ADD2r', 'SUB2r', 'MUL2r', 'DIV2r', 'AND2r', 'ORA2r', 'EOR2r', 'SFT2r', | |
'LIT', 'INCk', 'POPk', 'NIPk', 'SWPk', 'ROTk', 'DUPk', 'OVRk', | |
'EQUk', 'NEQk', 'GTHk', 'LTHk', 'JMPk', 'JCNk', 'JSRk', 'STHk', | |
'LDZk', 'STZk', 'LDRk', 'STRk', 'LDAk', 'STAk', 'DEIk', 'DEOk', | |
'ADDk', 'SUBk', 'MULk', 'DIVk', 'ANDk', 'ORAk', 'EORk', 'SFTk', | |
'LIT2', 'INC2k', 'POP2k', 'NIP2k', 'SWP2k', 'ROT2k', 'DUP2k', 'OVR2k', | |
'EQU2k', 'NEQ2k', 'GTH2k', 'LTH2k', 'JMP2k', 'JCN2k', 'JSR2k', 'STH2k', | |
'LDZ2k', 'STZ2k', 'LDR2k', 'STR2k', 'LDA2k', 'STA2k', 'DEI2k', 'DEO2k', | |
'ADD2k', 'SUB2k', 'MUL2k', 'DIV2k', 'AND2k', 'ORA2k', 'EOR2k', 'SFT2k', | |
'LITr', 'INCkr', 'POPkr', 'NIPkr', 'SWPkr', 'ROTkr', 'DUPkr', 'OVRkr', | |
'EQUkr', 'NEQkr', 'GTHkr', 'LTHkr', 'JMPkr', 'JCNkr', 'JSRkr', 'STHkr', | |
'LDZkr', 'STZkr', 'LDRkr', 'STRkr', 'LDAkr', 'STAkr', 'DEIkr', 'DEOkr', | |
'ADDkr', 'SUBkr', 'MULkr', 'DIVkr', 'ANDkr', 'ORAkr', 'EORkr', 'SFTkr', | |
'LIT2r', 'INC2kr', 'POP2kr', 'NIP2kr', 'SWP2kr', 'ROT2kr', 'DUP2kr', 'OVR2kr', | |
'EQU2kr', 'NEQ2kr', 'GTH2kr', 'LTH2kr', 'JMP2kr', 'JCN2kr', 'JSR2kr', 'STH2kr', | |
'LDZ2kr', 'STZ2kr', 'LDR2kr', 'STR2kr', 'LDA2kr', 'STA2kr', 'DEI2kr', 'DEO2kr', | |
'ADD2kr', 'SUB2kr', 'MUL2kr', 'DIV2kr', 'AND2kr', 'ORA2kr', 'EOR2kr', 'SFT2kr', | |
] | |
opcode_to_byte = {} | |
s = max(len(o) for o in opcodes) | |
for i, opcode in enumerate(opcodes): | |
#print(f'{opcode:<{s}} = 0x{i:02x},') | |
opcode_to_byte[opcode.lower()] = i | |
imm1 = { | |
'lit', | |
'litr', | |
} | |
imm2 = { | |
'jci', | |
'jsi', | |
'jmi', | |
'lit2', | |
'lit2r', | |
} | |
have_immediate = imm1 | imm2 | |
relative = { | |
'jci', | |
'jsi', | |
'jmi', | |
'ldr', | |
'str', | |
} | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment