Skip to content

Instantly share code, notes, and snippets.

@deniska
Created May 29, 2025 18:38
Show Gist options
  • Save deniska/4e84fcca0a0f945fc99a72ed313497c1 to your computer and use it in GitHub Desktop.
Save deniska/4e84fcca0a0f945fc99a72ed313497c1 to your computer and use it in GitHub Desktop.
simple unconventional uxn assembler
org 0x10
Console:
.vector: rb 2
.read: rb 1
.pad: rb 4
.type: rb 1
.write: rb 1
.error: rb 1
org 0x100
main:
lit2 str
.loop:
ldak
dup
jci .cont
jmi .quit
.cont:
lit Console.write
deo
inc2
jmi .loop
.quit:
brk
str: bytes "Hello world", 0x0a, 0
import sys
from dataclasses import dataclass
import collections
import enum
import string
class TokenType(enum.Enum):
identifier = enum.auto()
number = enum.auto()
string = enum.auto()
comma = enum.auto()
colon = enum.auto()
newline = enum.auto()
@dataclass
class Token:
token_type: TokenType
value: object
identifier_start = set(string.ascii_letters + '.')
identifier_cont = identifier_start | set(string.digits)
class Tokenizer:
def __init__(self, input_file):
self.input_file = input_file
self.stash = None
def next(self):
if self.stash is None:
return self.input_file.read(1)
else:
c = self.stash
self.stash = None
return c
def putback(self, c):
self.stash = c
def tokenize(self):
while True:
c = self.next()
if c == '':
break
elif c in ' \t\r':
continue
elif c == '\n':
yield Token(TokenType.newline, '\n')
elif c in identifier_start:
cur_token = [c]
while (c := self.next()) in identifier_cont:
cur_token.append(c)
name = ''.join(cur_token)
yield Token(TokenType.identifier, name)
self.putback(c)
elif c in string.digits:
digits = [c]
base = 10
digit_chars = string.digits
if c == '0':
if (c := self.next()) == 'x':
base = 16
digit_chars = string.hexdigits
else:
self.putback(c)
while (c := self.next()) in digit_chars:
digits.append(c)
yield Token(TokenType.number, int(''.join(digits), base))
self.putback(c)
elif c == ':':
yield Token(TokenType.colon, ':')
elif c == ',':
yield Token(TokenType.comma, ',')
elif c == ';':
while (c := self.next()) not in ('\n', ''):
pass
self.putback(c)
elif c == '"':
chars = []
while True:
c = self.next()
if c == '"':
break
elif c == '\\':
c = self.next()
if c == '"':
chars.append(c)
elif c == '\\':
chars.append(c)
else:
raise ValueError(f'Unknown string escape: {c!r}')
elif c in ('\n', ''):
raise ValueError('Unclosed string')
chars.append(c)
yield Token(TokenType.string, ''.join(chars))
else:
raise ValueError(f'Unknown character: {c!r}')
class Assembler:
def __init__(self, input_file, output_file):
self.cur = 0
self.out_buf = bytearray()
self.output_file = output_file
self.tokens = Tokenizer(input_file).tokenize()
self.stash = None
self.prev_label = ''
self.labels = {}
self.pending_labels_wide = collections.defaultdict(list)
self.pending_labels = collections.defaultdict(list)
self.relative_addrs = set()
def next(self):
if self.stash:
tok = self.stash
self.stash = None
return tok
else:
try:
tok = next(self.tokens)
return tok
except StopIteration:
return None
def putback(self, tok):
self.stash = tok
def write_byte(self, b):
if b > 255:
raise ValueError(f'Numeric immediate too big for byte: {b}')
while len(self.out_buf) <= self.cur:
self.out_buf.append(0)
self.out_buf[self.cur] = b
self.cur += 1
def write_short(self, num):
if num > 65535:
raise ValueError(f'Numeric immediate too big for short: {num}')
self.write_byte(num >> 8)
self.write_byte(num & 0xff)
def add_label(self, tok):
name = tok.value
if name[0] == '.':
name = self.prev_label + name
else:
self.prev_label = name
if name in self.labels:
raise ValueError(f'Label {name!r} already defined')
self.labels[name] = self.cur
def assemble(self):
while True:
tok = self.next()
if tok is None:
break
elif tok.token_type == TokenType.newline:
continue
elif tok.token_type == TokenType.identifier:
n = self.next()
if n.token_type == TokenType.colon:
self.add_label(tok)
tok = self.next()
else:
self.putback(n)
if tok is not None and tok.token_type != TokenType.newline:
self.parse_identifier(tok)
else:
raise ValueError(f'Dunno what to do with {tok}')
for label, addrs in self.pending_labels_wide.items():
if label not in self.labels:
raise ValueError(f'Label not found: {label!r}')
label_addr = self.labels[label]
for addr in addrs:
self.cur = addr
if addr in self.relative_addrs:
self.write_short((label_addr - addr - 2) & 0xffff)
else:
self.write_short(label_addr)
for label, addrs in self.pending_labels.items():
if label not in self.labels:
raise ValueError(f'Label not found: {label!r}')
label_addr = self.labels[label]
for addr in addrs:
self.cur = addr
if addr in self.relative_addrs:
self.write_byte((label_addr - addr - 2) & 0xff)
else:
self.write_byte(label_addr)
self.output_file.write(self.out_buf[0x100:])
def expect(self, token_type):
tok = self.next()
if tok is None:
raise ValueError(f'Expected {token_type}, got EOF')
if tok.token_type != token_type:
raise ValueError(f'Expected {token_type}, got {tok.token_type}')
return tok
def expectmany(self, *token_types):
tok = self.next()
for token_type in token_types:
if tok.token_type == token_type:
return tok
raise ValueError(f'Expected one of: {token_types}, got {tok.token_type}')
def parse_immediate(self, is_wide, is_relative):
tok = self.expectmany(TokenType.number, TokenType.identifier)
if tok.token_type == TokenType.identifier:
name = tok.value
if is_relative:
self.relative_addrs.add(self.cur)
if name[0] == '.':
name = self.prev_label + name
if is_wide:
self.pending_labels_wide[name].append(self.cur)
self.write_short(0)
else:
self.pending_labels[name].append(self.cur)
self.write_byte(0)
elif tok.token_type == TokenType.number:
num = tok.value
if is_wide:
self.write_short(num)
else:
self.write_byte(num)
def parse_identifier(self, tok):
ident = tok.value
identl = ident.lower()
if ident == 'org':
tok = self.expect(TokenType.number)
self.cur = tok.value
self.expect(TokenType.newline)
elif ident == 'rb':
tok = self.expect(TokenType.number)
self.cur += tok.value
self.expect(TokenType.newline)
elif ident == 'bytes':
while True:
tok = self.next()
if tok.token_type == TokenType.string:
for b in tok.value.encode('ascii'):
self.write_byte(b)
elif tok.token_type == TokenType.number:
self.write_byte(tok.value)
else:
raise ValueError(f'Expected number or string, got {tok.token_type}')
tok = self.next()
if tok is None or tok.token_type == TokenType.newline:
break
if tok.token_type != TokenType.comma:
raise ValueError(f'Expected comma, got {tok.token_type}')
elif identl in opcode_to_byte:
self.write_byte(opcode_to_byte[identl])
is_relative = False
for rel in relative:
if identl.startswith(rel):
is_relative = True
break
if identl in have_immediate:
self.parse_immediate(identl in imm2, is_relative)
self.expect(TokenType.newline)
else:
raise ValueError(f'Unknown instruction or directive: {ident!r}')
def main():
Assembler(open(sys.argv[1]), open(sys.argv[2], 'wb')).assemble()
opcodes = [
'BRK', 'INC', 'POP', 'NIP', 'SWP', 'ROT', 'DUP', 'OVR',
'EQU', 'NEQ', 'GTH', 'LTH', 'JMP', 'JCN', 'JSR', 'STH',
'LDZ', 'STZ', 'LDR', 'STR', 'LDA', 'STA', 'DEI', 'DEO',
'ADD', 'SUB', 'MUL', 'DIV', 'AND', 'ORA', 'EOR', 'SFT',
'JCI', 'INC2', 'POP2', 'NIP2', 'SWP2', 'ROT2', 'DUP2', 'OVR2',
'EQU2', 'NEQ2', 'GTH2', 'LTH2', 'JMP2', 'JCN2', 'JSR2', 'STH2',
'LDZ2', 'STZ2', 'LDR2', 'STR2', 'LDA2', 'STA2', 'DEI2', 'DEO2',
'ADD2', 'SUB2', 'MUL2', 'DIV2', 'AND2', 'ORA2', 'EOR2', 'SFT2',
'JMI', 'INCr', 'POPr', 'NIPr', 'SWPr', 'ROTr', 'DUPr', 'OVRr',
'EQUr', 'NEQr', 'GTHr', 'LTHr', 'JMPr', 'JCNr', 'JSRr', 'STHr',
'LDZr', 'STZr', 'LDRr', 'STRr', 'LDAr', 'STAr', 'DEIr', 'DEOr',
'ADDr', 'SUBr', 'MULr', 'DIVr', 'ANDr', 'ORAr', 'EORr', 'SFTr',
'JSI', 'INC2r', 'POP2r', 'NIP2r', 'SWP2r', 'ROT2r', 'DUP2r', 'OVR2r',
'EQU2r', 'NEQ2r', 'GTH2r', 'LTH2r', 'JMP2r', 'JCN2r', 'JSR2r', 'STH2r',
'LDZ2r', 'STZ2r', 'LDR2r', 'STR2r', 'LDA2r', 'STA2r', 'DEI2r', 'DEO2r',
'ADD2r', 'SUB2r', 'MUL2r', 'DIV2r', 'AND2r', 'ORA2r', 'EOR2r', 'SFT2r',
'LIT', 'INCk', 'POPk', 'NIPk', 'SWPk', 'ROTk', 'DUPk', 'OVRk',
'EQUk', 'NEQk', 'GTHk', 'LTHk', 'JMPk', 'JCNk', 'JSRk', 'STHk',
'LDZk', 'STZk', 'LDRk', 'STRk', 'LDAk', 'STAk', 'DEIk', 'DEOk',
'ADDk', 'SUBk', 'MULk', 'DIVk', 'ANDk', 'ORAk', 'EORk', 'SFTk',
'LIT2', 'INC2k', 'POP2k', 'NIP2k', 'SWP2k', 'ROT2k', 'DUP2k', 'OVR2k',
'EQU2k', 'NEQ2k', 'GTH2k', 'LTH2k', 'JMP2k', 'JCN2k', 'JSR2k', 'STH2k',
'LDZ2k', 'STZ2k', 'LDR2k', 'STR2k', 'LDA2k', 'STA2k', 'DEI2k', 'DEO2k',
'ADD2k', 'SUB2k', 'MUL2k', 'DIV2k', 'AND2k', 'ORA2k', 'EOR2k', 'SFT2k',
'LITr', 'INCkr', 'POPkr', 'NIPkr', 'SWPkr', 'ROTkr', 'DUPkr', 'OVRkr',
'EQUkr', 'NEQkr', 'GTHkr', 'LTHkr', 'JMPkr', 'JCNkr', 'JSRkr', 'STHkr',
'LDZkr', 'STZkr', 'LDRkr', 'STRkr', 'LDAkr', 'STAkr', 'DEIkr', 'DEOkr',
'ADDkr', 'SUBkr', 'MULkr', 'DIVkr', 'ANDkr', 'ORAkr', 'EORkr', 'SFTkr',
'LIT2r', 'INC2kr', 'POP2kr', 'NIP2kr', 'SWP2kr', 'ROT2kr', 'DUP2kr', 'OVR2kr',
'EQU2kr', 'NEQ2kr', 'GTH2kr', 'LTH2kr', 'JMP2kr', 'JCN2kr', 'JSR2kr', 'STH2kr',
'LDZ2kr', 'STZ2kr', 'LDR2kr', 'STR2kr', 'LDA2kr', 'STA2kr', 'DEI2kr', 'DEO2kr',
'ADD2kr', 'SUB2kr', 'MUL2kr', 'DIV2kr', 'AND2kr', 'ORA2kr', 'EOR2kr', 'SFT2kr',
]
opcode_to_byte = {}
s = max(len(o) for o in opcodes)
for i, opcode in enumerate(opcodes):
#print(f'{opcode:<{s}} = 0x{i:02x},')
opcode_to_byte[opcode.lower()] = i
imm1 = {
'lit',
'litr',
}
imm2 = {
'jci',
'jsi',
'jmi',
'lit2',
'lit2r',
}
have_immediate = imm1 | imm2
relative = {
'jci',
'jsi',
'jmi',
'ldr',
'str',
}
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment