Skip to content

Instantly share code, notes, and snippets.

@monomere
Created January 30, 2025 23:32
Show Gist options
  • Save monomere/093c07a47283c50dad3bdb0f365cb78a to your computer and use it in GitHub Desktop.
Save monomere/093c07a47283c50dad3bdb0f365cb78a to your computer and use it in GitHub Desktop.
speedrun vm (and compiled language)
// Copyright 2025 monomere
// MIT License
#ifndef SRVM_BITCODE_H_
#define SRVM_BITCODE_H_
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
enum srvm_instr_op : uint8_t {
SRVM_NOP,
SRVM_ADD,
SRVM_SUB,
SRVM_MUL,
SRVM_DIV,
SRVM_MOV,
SRVM_JMP,
SRVM_JLT,
SRVM_JGE,
SRVM_JEQ,
SRVM_JNE,
SRVM_IMM,
SRVM_INP,
SRVM_OUT,
SRVM_HLT,
SRVM_MST,
SRVM_MLD,
};
#define SRVM_NREG 8
static void srvm_disasm(uint8_t *mem, size_t len, FILE *fout, size_t lim) {
static char const *const cc_none[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "\033[1;31m?\033[m" };
static char const *const cc_this[] = { "ip", "sp", "rv", "t2", "r4", "r5", "r6", "im", "\033[1;31m?\033[m" };
char const *const *cc = cc_this;
for (size_t off = 0, i = 0; off < len && i < lim; ++i) {
size_t a0 = off + 0, a1 = off + 1, a2 = off + 2, a3 = off + 3;
#define RG(a) cc[mem[a] > SRVM_NREG ? SRVM_NREG : mem[a]]
fprintf(fout, "[@0x%04zx] ", a0);
switch (mem[a0]) {
case SRVM_NOP: fprintf(fout, "nop\n"); off += 1; break;
case SRVM_ADD: fprintf(fout, "%s ← %s + %s\n",
RG(a3), RG(a1), RG(a2)); off += 4; break;
case SRVM_SUB: fprintf(fout, "%s ← %s - %s\n",
RG(a3), RG(a1), RG(a2)); off += 4; break;
case SRVM_MUL: fprintf(fout, "%s ← %s * %s\n",
RG(a3), RG(a1), RG(a2)); off += 4; break;
case SRVM_DIV: fprintf(fout, "%s ← %s / %s\n",
RG(a3), RG(a1), RG(a2)); off += 4; break;
case SRVM_MOV: fprintf(fout, "%s ← %s\n",
RG(a2), RG(a1)); off += 3; break;
case SRVM_JMP: fprintf(fout, "→ %s\n", RG(a1)); off += 3; break;
case SRVM_JLT: fprintf(fout, "%s < %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break;
case SRVM_JGE: fprintf(fout, "%s ≥ %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break;
case SRVM_JEQ: fprintf(fout, "%s = %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break;
case SRVM_JNE: fprintf(fout, "%s ≠ %s ? → %s\n", RG(a1), RG(a2), RG(a3)); off += 4; break;
case SRVM_IMM: fprintf(fout, "%s ← %d\n", RG(a3),
((uint16_t)mem[a1] << 8) | (uint16_t)mem[a2]); off += 4; break;
case SRVM_INP: fprintf(fout, "%s ← inp\n", RG(a1)); off += 2; break;
case SRVM_OUT: fprintf(fout, "out %s\n", RG(a1)); off += 2; break;
case SRVM_HLT: fprintf(fout, "hlt\n"); off += 1; break;
case SRVM_MST: fprintf(fout, "(%s) ← %s\n", RG(a2), RG(a1)); off += 3; break;
case SRVM_MLD: fprintf(fout, "%s ← (%s)\n", RG(a2), RG(a1)); off += 3; break;
default: fprintf(fout, "unknown instruction, halting.\n"); off = len; break;
}
#undef RG
}
}
#endif // SRVM_BITCODE_H_
// Copyright 2025 monomere
// MIT License
#include "bitcode.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
enum cell_kind : uint8_t {
CELL_INT,
CELL_KWD,
CELL_REF,
};
struct cell {
union {
uintptr_t v_int;
char *v_kwd;
struct cell *v_ref;
};
struct cell *next;
};
#define HIGH_BIT_MASK \
((uintptr_t)1 << (sizeof(uintptr_t) * 8 - 1))
static inline enum cell_kind cell_kind(
struct cell cell
) {
if ((uintptr_t)cell.next & HIGH_BIT_MASK) {
return CELL_KWD;
}
if (cell.v_int & HIGH_BIT_MASK) {
return CELL_INT;
}
return CELL_REF;
}
static inline uint32_t cell_get_int(
struct cell cell
) { return cell.v_int & ~HIGH_BIT_MASK; }
static inline char *cell_get_kwd(
struct cell cell
) { return cell.v_kwd; }
static inline struct cell *cell_get_ref(
struct cell cell
) {
return (void*)((uintptr_t)cell.v_ref & ~HIGH_BIT_MASK);
}
static inline struct cell *cell_get_next(
struct cell cell
) {
return (void*)((uintptr_t)cell.next & ~HIGH_BIT_MASK);
}
static char *parse(
char *text,
struct cell **res
) {
struct cell *init = NULL;
struct cell *cur = init;
uintptr_t next_bits = 0;
char *zero_ptr = NULL;
while (*text && *text != ')') {
while (*text <= ' ') ++text;
if (!*text || *text == ')') break;
struct cell *cell = calloc(1, sizeof(struct cell));
if (!init) init = cell;
else cur->next = (void*)((uintptr_t)cell | next_bits);
cur = cell;
next_bits = 0;
if (*text == '(') {
++text;
struct cell *r = NULL;
text = parse(text, &r);
while (*text <= ' ') ++text;
if (*text != ')') {
fprintf(stderr, "expected ')', got '%c'\n", *text);
exit(1);
}
++text;
cur->v_ref = r;
} else if (*text >= '0' && *text <= '9') {
int32_t v = 0;
while (*text >= '0' && *text <= '9') {
v = v * 10 + *text - '0';
++text;
}
cur->v_int = (uintptr_t)v | HIGH_BIT_MASK;
} else {
char *start = text;
while (*text > ' ' && *text < 127) ++text;
zero_ptr = text;
cur->v_kwd = start;
next_bits = HIGH_BIT_MASK;
}
if (zero_ptr != NULL && zero_ptr != text)
*zero_ptr = '\0';
}
*res = init;
return text;
}
void print_cell(struct cell cell, FILE *fout) {
switch (cell_kind(cell)) {
case CELL_INT: fprintf(fout, "%d", cell_get_int(cell)); break;
case CELL_KWD: fprintf(fout, "%s", cell_get_kwd(cell)); break;
case CELL_REF: putc('(', fout); print_cell(*cell_get_ref(cell), fout); putc(')', fout); break;
default: fprintf(fout, "<borked>\n"); break;
}
if (cell.next != NULL) {
putc(' ', fout);
print_cell(*cell_get_next(cell), fout);
}
}
struct dyna {
size_t len, cap;
uint8_t *mem;
};
static inline size_t fit_by_halfs(size_t tgt, size_t n) {
if (n < 2) n = 256;
while (n < tgt) n += n / 2;
return n;
}
static void dyna_grow(struct dyna *dyna, size_t n) {
dyna->len = n;
if (dyna->len <= dyna->cap) return;
size_t old_cap = dyna->cap;
dyna->cap = fit_by_halfs(n, old_cap);
if (old_cap == 0) dyna->mem = malloc(dyna->cap);
else dyna->mem = realloc(dyna->mem, dyna->cap);
}
static void dyna_free(struct dyna *dyna) {
dyna->cap = 0;
dyna->len = 0;
free(dyna->mem);
dyna->mem = NULL;
}
[[gnu::alloc_size(2)]]
static uint8_t *dyna_push(struct dyna *dyna, size_t n) {
if (n > 0) dyna_grow(dyna, dyna->len + n);
return dyna->mem + dyna->len - n;
}
struct compiler {
struct dyna out;
};
// the calling convention is like so:
// - R0 is the instruction pointer;
// - R1 is the stack pointer;
// - R2 is the current temporary and return value;
// - R3 is the second temporary;
// - R7 is the immediate value register;
#define CC_IP 0
#define CC_SP 1
#define CC_RV 2
#define CC_T2 3
#define CC_IM 7
static void compile(struct compiler *comp, struct cell cell) {
uint8_t *p = NULL;
struct cell fn = cell;
if (cell_kind(fn) != CELL_KWD) {
fprintf(stderr, "cannot call non-keyword values (got ");
print_cell(cell, stderr);
fprintf(stderr, ")\n");
exit(1);
}
if (fn.next) for (struct cell arg = *cell_get_next(fn);;) {
switch (cell_kind(arg)) {
case CELL_INT:
p = dyna_push(&comp->out, 4);
p[0] = SRVM_IMM;
p[1] = (arg.v_int >> 8) & 0xFF;
p[2] = arg.v_int & 0xFF;
p[3] = CC_RV;
break;
case CELL_KWD:
fprintf(stderr, "variables as arguments are currently not supported.\n");
exit(1);
case CELL_REF:
compile(comp, *cell_get_ref(arg));
break;
}
if ((uintptr_t)arg.next & ~HIGH_BIT_MASK) {
p = dyna_push(&comp->out, 11);
p[0] = SRVM_MST; p[1] = CC_RV; p[2] = CC_SP; // (sp) ← rv
p[3] = SRVM_IMM; p[4] = 0; p[5] = 1; p[6] = CC_IM; // im ← 1
p[7] = SRVM_ADD; p[8] = CC_SP; p[9] = CC_IM; p[10] = CC_SP; // sp ← sp + im
arg = *cell_get_next(arg);
} else break;
}
char *name = cell_get_kwd(fn);
if(strcmp(name, "+") == 0
|| strcmp(name, "-") == 0
|| strcmp(name, "*") == 0
|| strcmp(name, "/") == 0
) {
enum srvm_instr_op op = SRVM_ADD;
switch (name[0]) {
case '+': op = SRVM_ADD; break;
case '-': op = SRVM_SUB; break;
case '*': op = SRVM_MUL; break;
case '/': op = SRVM_DIV; break;
}
p = dyna_push(&comp->out, 18);
p[0] = SRVM_MOV; p[1] = CC_RV; p[2] = CC_T2; // t2 ← rv
p[3] = SRVM_IMM; p[4] = 0; p[5] = 1; p[6] = CC_IM; // im ← 1
p[7] = SRVM_SUB; p[8] = CC_SP; p[9] = CC_IM; p[10] = CC_SP; // sp ← sp - im
p[11] = SRVM_MLD; p[12] = CC_SP; p[13] = CC_RV; // rv ← (sp)
p[14] = op; p[15] = CC_RV; p[16] = CC_T2; p[17] = CC_RV; // rv ← rv OP t2
} else if (strcmp(name, "out") == 0) {
p = dyna_push(&comp->out, 2);
p[0] = SRVM_OUT; p[1] = CC_RV;
} else {
fprintf(stderr, "unknown operator: '%s'\n", name);
exit(1);
}
}
static void free_cell(struct cell cell) {
if (cell_kind(cell) == CELL_REF) {
free_cell(*cell_get_ref(cell));
free(cell_get_ref(cell));
}
struct cell *next = cell_get_next(cell);
if (next) {
free_cell(*next);
free(next);
}
}
int main(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr, "Usage:\n\t%s <program> <file>\nWhere:\n"
"\t<program> is the program source code;\n"
"\t<file> is either a filename or `-` for stdout.\n",
argv[0]
);
exit(1);
}
char *text = argv[1];
struct cell *c;
parse(text, &c);
print_cell(*c, stdout);
putchar('\n');
struct compiler comp = {0};
compile(&comp, *c->v_ref);
*dyna_push(&comp.out, 1) = SRVM_HLT;
srvm_disasm(comp.out.mem, comp.out.len, stderr, -1);
if (strcmp(argv[2], "-") != 0) {
FILE *fout = fopen(argv[2], "wb");
fwrite(comp.out.mem, 1, comp.out.len, fout);
fclose(fout);
} else {
fwrite(comp.out.mem, 1, comp.out.len, stdout);
}
dyna_free(&comp.out);
free_cell(*c);
free(c);
return 0;
}
// Copyright 2025 monomere
// MIT License
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "bitcode.h"
// TODO: deal with two's complement in `cell.v_int`.
struct cpu {
int32_t regs[8];
size_t memsz;
uint8_t *mem;
};
void run(struct cpu *cpu) {
while ((uint32_t)cpu->regs[0] < cpu->memsz) {
#define REGAT(a) cpu->regs[cpu->mem[a]]
#define PCREG cpu->regs[0]
size_t a0 = PCREG + 0, a1 = PCREG + 1,
a2 = PCREG + 2, a3 = PCREG + 3;
// if (cpu->mem[a0] != 0) srvm_disasm(cpu->mem + a0, cpu->memsz - a0, stdout, 1);
switch (cpu->mem[a0]) {
case SRVM_NOP: PCREG += 1; break;
case SRVM_ADD:
REGAT(a3) = REGAT(a1) + REGAT(a2);
PCREG += 4; break;
case SRVM_SUB:
REGAT(a3) = REGAT(a1) - REGAT(a2);
PCREG += 4; break;
case SRVM_MUL:
REGAT(a3) = REGAT(a1) * REGAT(a2);
PCREG += 4; break;
case SRVM_DIV:
REGAT(a3) = REGAT(a1) / REGAT(a2);
PCREG += 4; break;
case SRVM_MOV:
REGAT(a2) = REGAT(a1); PCREG += 3; break;
case SRVM_JMP:
PCREG = REGAT(a1); break;
case SRVM_JLT:
if (REGAT(a1) < REGAT(a2))
PCREG = REGAT(a3); else PCREG += 4; break;
case SRVM_JGE:
if (REGAT(a1) >= REGAT(a2))
PCREG = REGAT(a3); else PCREG += 4; break;
case SRVM_JEQ:
if (REGAT(a1) == REGAT(a2))
PCREG = REGAT(a3); else PCREG += 4; break;
case SRVM_JNE:
if (REGAT(a1) != REGAT(a2))
PCREG = REGAT(a3); else PCREG += 4; break;
case SRVM_IMM:
REGAT(a3)
= ((uint16_t)cpu->mem[a1] << 8)
| (uint16_t)cpu->mem[a2];
PCREG += 4;
break;
case SRVM_INP:
printf("[@0x%04x] input: ", PCREG);
fflush(stdout);
scanf("%d", &REGAT(a1));
PCREG += 2;
break;
case SRVM_OUT:
printf("[@0x%04x] output: %d\n", PCREG, REGAT(a1));
PCREG += 2;
break;
case SRVM_HLT:
PCREG = cpu->memsz;
break;
case SRVM_MST:
// printf("[@0x%04x] memory write: %d -> 0x%04x\n", PCREG, REGAT(a1), REGAT(a2));
cpu->mem[REGAT(a2)] = REGAT(a1);
PCREG += 3;
break;
case SRVM_MLD:
// printf("[@0x%04x] memory read: 0x%04x -> %d\n", PCREG, REGAT(a1), REGAT(a2));
REGAT(a2) = cpu->mem[REGAT(a1)];
PCREG += 3;
break;
}
}
}
void create_memory_from_image(char const *fname, size_t bufsz, void *buf) {
struct stat st;
stat(fname, &st);
assert(bufsz >= st.st_size);
FILE *fin = fopen(fname, "rb");
fread(buf, st.st_size, 1, fin);
fclose(fin);
}
int main(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "Usage:\n\t%s <file>\nWhere:\n"
"\t<file> is a filename.\n",
argv[0]
);
exit(1);
}
// uint8_t test[] = {
// SRVM_IMM, 0, 20, 1,
// SRVM_INP, 2,
// SRVM_ADD, 1, 2, 3,
// SRVM_OUT, 3,
// SRVM_IMM, 0x01,0x00, 7,
// SRVM_JLT, 3, 1, 7,
// SRVM_IMM, 0,2, 6,
// SRVM_OUT, 6,
// SRVM_IMM, 0x01,0x06, 7,
// SRVM_JMP, 7,
// [0x100]=
// SRVM_IMM, 0,1, 6,
// SRVM_OUT, 6,
// [0x106]=
// SRVM_HLT
// };
uint8_t buf[1024];
size_t bufsz = sizeof buf;
create_memory_from_image(argv[1], bufsz, buf);
struct cpu cpu = {
.regs = { [1] = 0x100, },
.memsz = bufsz,
.mem = buf,
};
run(&cpu);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment