Created
January 30, 2025 23:32
-
-
Save monomere/093c07a47283c50dad3bdb0f365cb78a to your computer and use it in GitHub Desktop.
speedrun vm (and compiled language)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2025 monomere | |
// MIT License | |
#ifndef SRVM_BITCODE_H_ | |
#define SRVM_BITCODE_H_ | |
#include <stdint.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
enum srvm_instr_op : uint8_t { | |
SRVM_NOP, | |
SRVM_ADD, | |
SRVM_SUB, | |
SRVM_MUL, | |
SRVM_DIV, | |
SRVM_MOV, | |
SRVM_JMP, | |
SRVM_JLT, | |
SRVM_JGE, | |
SRVM_JEQ, | |
SRVM_JNE, | |
SRVM_IMM, | |
SRVM_INP, | |
SRVM_OUT, | |
SRVM_HLT, | |
SRVM_MST, | |
SRVM_MLD, | |
}; | |
#define SRVM_NREG 8 | |
static void srvm_disasm(uint8_t *mem, size_t len, FILE *fout, size_t lim) { | |
static char const *const cc_none[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "\033[1;31m?\033[m" }; | |
static char const *const cc_this[] = { "ip", "sp", "rv", "t2", "r4", "r5", "r6", "im", "\033[1;31m?\033[m" }; | |
char const *const *cc = cc_this; | |
for (size_t off = 0, i = 0; off < len && i < lim; ++i) { | |
size_t a0 = off + 0, a1 = off + 1, a2 = off + 2, a3 = off + 3; | |
#define RG(a) cc[mem[a] > SRVM_NREG ? SRVM_NREG : mem[a]] | |
fprintf(fout, "[@0x%04zx] ", a0); | |
switch (mem[a0]) { | |
case SRVM_NOP: fprintf(fout, "nop\n"); off += 1; break; | |
case SRVM_ADD: fprintf(fout, "%s ← %s + %s\n", | |
RG(a3), RG(a1), RG(a2)); off += 4; break; | |
case SRVM_SUB: fprintf(fout, "%s ← %s - %s\n", | |
RG(a3), RG(a1), RG(a2)); off += 4; break; | |
case SRVM_MUL: fprintf(fout, "%s ← %s * %s\n", | |
RG(a3), RG(a1), RG(a2)); off += 4; break; | |
case SRVM_DIV: fprintf(fout, "%s ← %s / %s\n", | |
RG(a3), RG(a1), RG(a2)); off += 4; break; | |
case SRVM_MOV: fprintf(fout, "%s ← %s\n", | |
RG(a2), RG(a1)); off += 3; break; | |
case SRVM_JMP: fprintf(fout, "→ %s\n", RG(a1)); off += 3; break; | |
case SRVM_JLT: fprintf(fout, "%s < %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break; | |
case SRVM_JGE: fprintf(fout, "%s ≥ %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break; | |
case SRVM_JEQ: fprintf(fout, "%s = %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break; | |
case SRVM_JNE: fprintf(fout, "%s ≠ %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break; | |
case SRVM_IMM: fprintf(fout, "%s ← %d\n", RG(a3), | |
((uint16_t)mem[a1] << 8) | (uint16_t)mem[a2]); off += 4; break; | |
case SRVM_INP: fprintf(fout, "%s ← inp\n", RG(a1)); off += 2; break; | |
case SRVM_OUT: fprintf(fout, "out %s\n", RG(a1)); off += 2; break; | |
case SRVM_HLT: fprintf(fout, "hlt\n"); off += 1; break; | |
case SRVM_MST: fprintf(fout, "(%s) ← %s\n", RG(a2), RG(a1)); off += 3; break; | |
case SRVM_MLD: fprintf(fout, "%s ← (%s)\n", RG(a2), RG(a1)); off += 3; break; | |
default: fprintf(fout, "unknown instruction, halting.\n"); off = len; break; | |
} | |
#undef RG | |
} | |
} | |
#endif // SRVM_BITCODE_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2025 monomere | |
// MIT License | |
#include "bitcode.h" | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
enum cell_kind : uint8_t { | |
CELL_INT, | |
CELL_KWD, | |
CELL_REF, | |
}; | |
struct cell { | |
union { | |
uintptr_t v_int; | |
char *v_kwd; | |
struct cell *v_ref; | |
}; | |
struct cell *next; | |
}; | |
#define HIGH_BIT_MASK \ | |
((uintptr_t)1 << (sizeof(uintptr_t) * 8 - 1)) | |
static inline enum cell_kind cell_kind( | |
struct cell cell | |
) { | |
if ((uintptr_t)cell.next & HIGH_BIT_MASK) { | |
return CELL_KWD; | |
} | |
if (cell.v_int & HIGH_BIT_MASK) { | |
return CELL_INT; | |
} | |
return CELL_REF; | |
} | |
static inline uint32_t cell_get_int( | |
struct cell cell | |
) { return cell.v_int & ~HIGH_BIT_MASK; } | |
static inline char *cell_get_kwd( | |
struct cell cell | |
) { return cell.v_kwd; } | |
static inline struct cell *cell_get_ref( | |
struct cell cell | |
) { | |
return (void*)((uintptr_t)cell.v_ref & ~HIGH_BIT_MASK); | |
} | |
static inline struct cell *cell_get_next( | |
struct cell cell | |
) { | |
return (void*)((uintptr_t)cell.next & ~HIGH_BIT_MASK); | |
} | |
static char *parse( | |
char *text, | |
struct cell **res | |
) { | |
struct cell *init = NULL; | |
struct cell *cur = init; | |
uintptr_t next_bits = 0; | |
char *zero_ptr = NULL; | |
while (*text && *text != ')') { | |
while (*text <= ' ') ++text; | |
if (!*text || *text == ')') break; | |
struct cell *cell = calloc(1, sizeof(struct cell)); | |
if (!init) init = cell; | |
else cur->next = (void*)((uintptr_t)cell | next_bits); | |
cur = cell; | |
next_bits = 0; | |
if (*text == '(') { | |
++text; | |
struct cell *r = NULL; | |
text = parse(text, &r); | |
while (*text <= ' ') ++text; | |
if (*text != ')') { | |
fprintf(stderr, "expected ')', got '%c'\n", *text); | |
exit(1); | |
} | |
++text; | |
cur->v_ref = r; | |
} else if (*text >= '0' && *text <= '9') { | |
int32_t v = 0; | |
while (*text >= '0' && *text <= '9') { | |
v = v * 10 + *text - '0'; | |
++text; | |
} | |
cur->v_int = (uintptr_t)v | HIGH_BIT_MASK; | |
} else { | |
char *start = text; | |
while (*text > ' ' && *text < 127) ++text; | |
zero_ptr = text; | |
cur->v_kwd = start; | |
next_bits = HIGH_BIT_MASK; | |
} | |
if (zero_ptr != NULL && zero_ptr != text) | |
*zero_ptr = '\0'; | |
} | |
*res = init; | |
return text; | |
} | |
void print_cell(struct cell cell, FILE *fout) { | |
switch (cell_kind(cell)) { | |
case CELL_INT: fprintf(fout, "%d", cell_get_int(cell)); break; | |
case CELL_KWD: fprintf(fout, "%s", cell_get_kwd(cell)); break; | |
case CELL_REF: putc('(', fout); print_cell(*cell_get_ref(cell), fout); putc(')', fout); break; | |
default: fprintf(fout, "<borked>\n"); break; | |
} | |
if (cell.next != NULL) { | |
putc(' ', fout); | |
print_cell(*cell_get_next(cell), fout); | |
} | |
} | |
struct dyna { | |
size_t len, cap; | |
uint8_t *mem; | |
}; | |
static inline size_t fit_by_halfs(size_t tgt, size_t n) { | |
if (n < 2) n = 256; | |
while (n < tgt) n += n / 2; | |
return n; | |
} | |
static void dyna_grow(struct dyna *dyna, size_t n) { | |
dyna->len = n; | |
if (dyna->len <= dyna->cap) return; | |
size_t old_cap = dyna->cap; | |
dyna->cap = fit_by_halfs(n, old_cap); | |
if (old_cap == 0) dyna->mem = malloc(dyna->cap); | |
else dyna->mem = realloc(dyna->mem, dyna->cap); | |
} | |
static void dyna_free(struct dyna *dyna) { | |
dyna->cap = 0; | |
dyna->len = 0; | |
free(dyna->mem); | |
dyna->mem = NULL; | |
} | |
[[gnu::alloc_size(2)]] | |
static uint8_t *dyna_push(struct dyna *dyna, size_t n) { | |
if (n > 0) dyna_grow(dyna, dyna->len + n); | |
return dyna->mem + dyna->len - n; | |
} | |
struct compiler { | |
struct dyna out; | |
}; | |
// the calling convention is like so: | |
// - R0 is the instruction pointer; | |
// - R1 is the stack pointer; | |
// - R2 is the current temporary and return value; | |
// - R3 is the second temporary; | |
// - R7 is the immediate value register; | |
#define CC_IP 0 | |
#define CC_SP 1 | |
#define CC_RV 2 | |
#define CC_T2 3 | |
#define CC_IM 7 | |
static void compile(struct compiler *comp, struct cell cell) { | |
uint8_t *p = NULL; | |
struct cell fn = cell; | |
if (cell_kind(fn) != CELL_KWD) { | |
fprintf(stderr, "cannot call non-keyword values (got "); | |
print_cell(cell, stderr); | |
fprintf(stderr, ")\n"); | |
exit(1); | |
} | |
if (fn.next) for (struct cell arg = *cell_get_next(fn);;) { | |
switch (cell_kind(arg)) { | |
case CELL_INT: | |
p = dyna_push(&comp->out, 4); | |
p[0] = SRVM_IMM; | |
p[1] = (arg.v_int >> 8) & 0xFF; | |
p[2] = arg.v_int & 0xFF; | |
p[3] = CC_RV; | |
break; | |
case CELL_KWD: | |
fprintf(stderr, "variables as arguments are currently not supported.\n"); | |
exit(1); | |
case CELL_REF: | |
compile(comp, *cell_get_ref(arg)); | |
break; | |
} | |
if ((uintptr_t)arg.next & ~HIGH_BIT_MASK) { | |
p = dyna_push(&comp->out, 11); | |
p[0] = SRVM_MST; p[1] = CC_RV; p[2] = CC_SP; // (sp) ← rv | |
p[3] = SRVM_IMM; p[4] = 0; p[5] = 1; p[6] = CC_IM; // im ← 1 | |
p[7] = SRVM_ADD; p[8] = CC_SP; p[9] = CC_IM; p[10] = CC_SP; // sp ← sp + im | |
arg = *cell_get_next(arg); | |
} else break; | |
} | |
char *name = cell_get_kwd(fn); | |
if(strcmp(name, "+") == 0 | |
|| strcmp(name, "-") == 0 | |
|| strcmp(name, "*") == 0 | |
|| strcmp(name, "/") == 0 | |
) { | |
enum srvm_instr_op op = SRVM_ADD; | |
switch (name[0]) { | |
case '+': op = SRVM_ADD; break; | |
case '-': op = SRVM_SUB; break; | |
case '*': op = SRVM_MUL; break; | |
case '/': op = SRVM_DIV; break; | |
} | |
p = dyna_push(&comp->out, 18); | |
p[0] = SRVM_MOV; p[1] = CC_RV; p[2] = CC_T2; // t2 ← rv | |
p[3] = SRVM_IMM; p[4] = 0; p[5] = 1; p[6] = CC_IM; // im ← 1 | |
p[7] = SRVM_SUB; p[8] = CC_SP; p[9] = CC_IM; p[10] = CC_SP; // sp ← sp - im | |
p[11] = SRVM_MLD; p[12] = CC_SP; p[13] = CC_RV; // rv ← (sp) | |
p[14] = op; p[15] = CC_RV; p[16] = CC_T2; p[17] = CC_RV; // rv ← rv OP t2 | |
} else if (strcmp(name, "out") == 0) { | |
p = dyna_push(&comp->out, 2); | |
p[0] = SRVM_OUT; p[1] = CC_RV; | |
} else { | |
fprintf(stderr, "unknown operator: '%s'\n", name); | |
exit(1); | |
} | |
} | |
static void free_cell(struct cell cell) { | |
if (cell_kind(cell) == CELL_REF) { | |
free_cell(*cell_get_ref(cell)); | |
free(cell_get_ref(cell)); | |
} | |
struct cell *next = cell_get_next(cell); | |
if (next) { | |
free_cell(*next); | |
free(next); | |
} | |
} | |
int main(int argc, char **argv) { | |
if (argc < 3) { | |
fprintf(stderr, "Usage:\n\t%s <program> <file>\nWhere:\n" | |
"\t<program> is the program source code;\n" | |
"\t<file> is either a filename or `-` for stdout.\n", | |
argv[0] | |
); | |
exit(1); | |
} | |
char *text = argv[1]; | |
struct cell *c; | |
parse(text, &c); | |
print_cell(*c, stdout); | |
putchar('\n'); | |
struct compiler comp = {0}; | |
compile(&comp, *c->v_ref); | |
*dyna_push(&comp.out, 1) = SRVM_HLT; | |
srvm_disasm(comp.out.mem, comp.out.len, stderr, -1); | |
if (strcmp(argv[2], "-") != 0) { | |
FILE *fout = fopen(argv[2], "wb"); | |
fwrite(comp.out.mem, 1, comp.out.len, fout); | |
fclose(fout); | |
} else { | |
fwrite(comp.out.mem, 1, comp.out.len, stdout); | |
} | |
dyna_free(&comp.out); | |
free_cell(*c); | |
free(c); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2025 monomere | |
// MIT License | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <sys/stat.h> | |
#include "bitcode.h" | |
// TODO: deal with two's complement in `cell.v_int`. | |
struct cpu { | |
int32_t regs[8]; | |
size_t memsz; | |
uint8_t *mem; | |
}; | |
void run(struct cpu *cpu) { | |
while ((uint32_t)cpu->regs[0] < cpu->memsz) { | |
#define REGAT(a) cpu->regs[cpu->mem[a]] | |
#define PCREG cpu->regs[0] | |
size_t a0 = PCREG + 0, a1 = PCREG + 1, | |
a2 = PCREG + 2, a3 = PCREG + 3; | |
// if (cpu->mem[a0] != 0) srvm_disasm(cpu->mem + a0, cpu->memsz - a0, stdout, 1); | |
switch (cpu->mem[a0]) { | |
case SRVM_NOP: PCREG += 1; break; | |
case SRVM_ADD: | |
REGAT(a3) = REGAT(a1) + REGAT(a2); | |
PCREG += 4; break; | |
case SRVM_SUB: | |
REGAT(a3) = REGAT(a1) - REGAT(a2); | |
PCREG += 4; break; | |
case SRVM_MUL: | |
REGAT(a3) = REGAT(a1) * REGAT(a2); | |
PCREG += 4; break; | |
case SRVM_DIV: | |
REGAT(a3) = REGAT(a1) / REGAT(a2); | |
PCREG += 4; break; | |
case SRVM_MOV: | |
REGAT(a2) = REGAT(a1); PCREG += 3; break; | |
case SRVM_JMP: | |
PCREG = REGAT(a1); break; | |
case SRVM_JLT: | |
if (REGAT(a1) < REGAT(a2)) | |
PCREG = REGAT(a3); else PCREG += 4; break; | |
case SRVM_JGE: | |
if (REGAT(a1) >= REGAT(a2)) | |
PCREG = REGAT(a3); else PCREG += 4; break; | |
case SRVM_JEQ: | |
if (REGAT(a1) == REGAT(a2)) | |
PCREG = REGAT(a3); else PCREG += 4; break; | |
case SRVM_JNE: | |
if (REGAT(a1) != REGAT(a2)) | |
PCREG = REGAT(a3); else PCREG += 4; break; | |
case SRVM_IMM: | |
REGAT(a3) | |
= ((uint16_t)cpu->mem[a1] << 8) | |
| (uint16_t)cpu->mem[a2]; | |
PCREG += 4; | |
break; | |
case SRVM_INP: | |
printf("[@0x%04x] input: ", PCREG); | |
fflush(stdout); | |
scanf("%d", ®AT(a1)); | |
PCREG += 2; | |
break; | |
case SRVM_OUT: | |
printf("[@0x%04x] output: %d\n", PCREG, REGAT(a1)); | |
PCREG += 2; | |
break; | |
case SRVM_HLT: | |
PCREG = cpu->memsz; | |
break; | |
case SRVM_MST: | |
// printf("[@0x%04x] memory write: %d -> 0x%04x\n", PCREG, REGAT(a1), REGAT(a2)); | |
cpu->mem[REGAT(a2)] = REGAT(a1); | |
PCREG += 3; | |
break; | |
case SRVM_MLD: | |
// printf("[@0x%04x] memory read: 0x%04x -> %d\n", PCREG, REGAT(a1), REGAT(a2)); | |
REGAT(a2) = cpu->mem[REGAT(a1)]; | |
PCREG += 3; | |
break; | |
} | |
} | |
} | |
void create_memory_from_image(char const *fname, size_t bufsz, void *buf) { | |
struct stat st; | |
stat(fname, &st); | |
assert(bufsz >= st.st_size); | |
FILE *fin = fopen(fname, "rb"); | |
fread(buf, st.st_size, 1, fin); | |
fclose(fin); | |
} | |
int main(int argc, char **argv) { | |
if (argc < 2) { | |
fprintf(stderr, "Usage:\n\t%s <file>\nWhere:\n" | |
"\t<file> is a filename.\n", | |
argv[0] | |
); | |
exit(1); | |
} | |
// uint8_t test[] = { | |
// SRVM_IMM, 0, 20, 1, | |
// SRVM_INP, 2, | |
// SRVM_ADD, 1, 2, 3, | |
// SRVM_OUT, 3, | |
// SRVM_IMM, 0x01,0x00, 7, | |
// SRVM_JLT, 3, 1, 7, | |
// SRVM_IMM, 0,2, 6, | |
// SRVM_OUT, 6, | |
// SRVM_IMM, 0x01,0x06, 7, | |
// SRVM_JMP, 7, | |
// [0x100]= | |
// SRVM_IMM, 0,1, 6, | |
// SRVM_OUT, 6, | |
// [0x106]= | |
// SRVM_HLT | |
// }; | |
uint8_t buf[1024]; | |
size_t bufsz = sizeof buf; | |
create_memory_from_image(argv[1], bufsz, buf); | |
struct cpu cpu = { | |
.regs = { [1] = 0x100, }, | |
.memsz = bufsz, | |
.mem = buf, | |
}; | |
run(&cpu); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment