Skip to content

Instantly share code, notes, and snippets.

@msullivan
Created September 14, 2025 00:02
Show Gist options
  • Save msullivan/a0917c1e54136f620e5e6c733ca7686c to your computer and use it in GitHub Desktop.
Save msullivan/a0917c1e54136f620e5e6c733ca7686c to your computer and use it in GitHub Desktop.
UM implementations
/*
* Implementation of a simulator for the Universal Machine (UM) for the 2006
* ICFP Programming Contest.
*
* This is an "honest" version that doesn't rely on sticking pointers
* into 32-bit integers. It is maybe a little slower, but not more
* than 10%? My measurements have all been kind of noisy.
*
* If SAFE is defined, then we do bounds checks and the like.
* That's a decent mount more slow.
*/
#define SAFE 1
#define COLLECT_STATS 0
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <arpa/inet.h>
#include <signal.h>
#include <ctype.h>
#include <assert.h>
#include <malloc.h>
/* Note: ^v is needed in readline to prefix the thinger */
#define ESC 27
#define EM 25 /* ^y */
/* str_rtrim: gets rid of whitespace on the right of a string by
* null terminating */
char *str_rtrim(char *s)
{
char *t = s;
char *last; /* last non-whitespace char */
if (!s) return s;
for (last = s; *s; s++)
if (!isspace(*s))
last = s+1;
if (*last)
*(last) = '\0';
return t;
}
uint32_t *read_file(char *filename)
{
int fd;
struct stat buf;
size_t size;
uint32_t *data;
size_t i;
if ((fd = open(filename, O_RDONLY)) < 0) {
perror("open");
exit(1);
}
fstat(fd, &buf);
size = (buf.st_size + 3)/4 + 1; /* +1 for rounding, +1 for size field */
data = calloc(size + 1, sizeof(uint32_t));
*data = size;
data++;
if (read(fd, data, buf.st_size) < 0) {
perror("read");
exit(1);
}
close(fd);
for (i = 0; i < size; i++)
data[i] = ntohl(data[i]);
return data;
}
typedef enum opcode_t {
CMOV, INDEX, AMEND, ADD, MUL, DIV, NAND, HALT, ALLOC, ABANDON,
OUT, IN, LOADP, ORTHO } opcode_t;
char *names[] = {
"CMOV", "INDEX", "AMEND", "ADD", "MUL", "DIV", "NAND", "HALT", "ALLOC",
"ABAND",
"OUT", "IN", "LOADP", "ORTHO" };
unsigned long long instr_count = 0;
unsigned long long instr_histogram[ORTHO+1];
void dump_histogram() {
for (int i = 0; i < ORTHO+1; i++) {
fprintf(stderr, "%s:\t%llu\n", names[i], instr_histogram[i]);
}
fprintf(stderr, "Total:\t%llu\n", instr_count);
}
#define MAX_SIZE 64
unsigned long long mem_histogram[MAX_SIZE+1];
unsigned long long mem_bigger;
void dump_mem_histogram() {
for (int i = 0; i <= MAX_SIZE; i++) {
if (mem_histogram[i]) {
fprintf(stderr, "%d:\t%llu\n", i, mem_histogram[i]);
}
}
fprintf(stderr, ">%d\t%llu\n", MAX_SIZE, mem_bigger);
uint64_t amount_under = instr_histogram[ALLOC]-mem_bigger;
double frac_under = (double)amount_under/(double)instr_histogram[ALLOC];
fprintf(stderr, "%% <= %d: %lf\n", MAX_SIZE, frac_under*100);
}
void incr_mem_histogram(unsigned size) {
if (size <= MAX_SIZE) {
mem_histogram[size]++;
} else {
//fprintf(stderr, "WOW! %d\n", size);
mem_bigger++;
}
}
#define OP ((instr >> 28) & 0xff)
#define A (regs[((instr >> 6) & 0x7)])
#define B (regs[((instr >> 3) & 0x7)])
#define C (regs[(instr & 0x7)])
#define array(p) ((p) ? arrays[p] : zero)
uint32_t regs[8];
void dump_regs() {
for (int i = 0; i < 8; i++) {
fprintf(stderr, "%d: 0x%08x (%d)\n", i, regs[i], regs[i]);
}
}
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#if SAFE
#define check(x) do { \
if (!(x)) { \
fprintf(stderr, "check failed: %s\n", #x); \
exit(1); \
} \
} while (0)
#else
#define check(x) do { } while (0)
#endif
int main(int argc, char *argv[])
{
uint32_t *zero;
unsigned zero_bounds;
uint32_t instr;
uint32_t IP = 0;
opcode_t op;
uint32_t *ray;
int initial_upload = 1;
FILE *dump = NULL;
FILE *upload = NULL;
unsigned num_arrays = 100; /* The maximum number of arrays */
int num_frees = 100;
int next_unallocated = 1; /* the slot in array not yet allocated. */
int next_free = 0; /* the first thing in free_slots */
uint32_t **arrays;
int *free_slots;
free_slots = calloc(num_frees, sizeof(int *));
check(free_slots);
arrays = calloc(num_arrays, sizeof(int **));
check(arrays);
if (argc < 2 || argc > 4) return 1;
zero = read_file(argv[1]);
zero_bounds = zero[-1];
if (argc >= 3) {
upload = fopen(argv[2], "r");
}
if (argc == 4) {
dump = fopen(argv[3], "w");
setvbuf(dump, NULL, _IOLBF, BUFSIZ);
}
/* ignore SIGINT to protect me from my own stupidity */
if (strstr(argv[1], "umix") != NULL)
signal(SIGINT, SIG_IGN);
#if COLLECT_STATS
int allocs_since = 0, max_allocs_since = 0;
int abandons_since = 0, max_abandons_since = 0;
unsigned memory_use = zero[-1];
#endif
while (1) {
check(IP < zero_bounds);
instr = zero[IP];
op = OP;
IP++;
#if COLLECT_STATS
instr_count++;
instr_histogram[op]++;
#endif
switch(op) {
case CMOV:
if (C)
A = B;
break;
case INDEX:
check(B < num_arrays);
check(B == 0 || arrays[B]-1 != NULL);
check(C < array(B)[-1]);
A = array(B)[C];
break;
case AMEND:
check(A < num_arrays);
check(A == 0 || arrays[A]-1 != NULL);
check(B < array(A)[-1]);
array(A)[B] = C;
break;
case ADD:
A = B + C;
break;
case MUL:
A = B * C;
break;
case DIV:
check(C);
A = B / C;
break;
case NAND:
A = ~(B & C);
break;
case HALT:
#if COLLECT_STATS
dump_histogram();
dump_mem_histogram();
dump_regs();
fprintf(stderr, "Max memory use: %u bytes (%0.2f MB)\n",
memory_use, memory_use/(1024.0*1024.0));
#endif
exit(0);
case ALLOC:
#if COLLECT_STATS
memory_use += C+1;
incr_mem_histogram(C);
if (abandons_since) {
// printf("ABANDONS SINCE: %d\n", abandons_since);
max_abandons_since = MAX(max_abandons_since, abandons_since);
abandons_since = 0;
}
allocs_since++;
#endif
{
uint32_t idx;
if (next_free > 0) {
idx = free_slots[next_free];
next_free--;
} else {
idx = next_unallocated;
next_unallocated++;
if (idx >= num_arrays) {
arrays = realloc(arrays, num_arrays*sizeof(uint32_t **)*2);
check(arrays);
#if SAFE
for (unsigned i = num_arrays; i < num_arrays*2; i++) {
arrays[i] = NULL;
}
#endif
num_arrays *= 2;
}
}
ray = calloc(C+1, sizeof(uint32_t));
*ray = C;
arrays[idx] = ray + 1;
B = idx;
break;
}
case ABANDON:
check(C != 0 && C < num_arrays && arrays[C]-1);
#if COLLECT_STATS
memory_use -= array(C)[-1] + 1;
abandons_since++;
if (allocs_since) {
// printf("ALLOCS SINCE: %d\n", allocs_since);
max_allocs_since = MAX(max_allocs_since, allocs_since);
allocs_since = 0;
}
#endif
next_free++;
if (next_free >= num_frees) {
num_frees *= 2;
free_slots = realloc(free_slots, num_frees*sizeof(int));
check(free_slots);
}
free_slots[next_free] = C;
free(array(C)-1);
arrays[C] = NULL;
break;
case OUT:
check(C < 256);
putchar(C);
if (dump) fputc(C, dump);
break;
case IN:
/* deal with having input overridden for upload */
if (upload) {
C = getc(upload);
if ((int)C == EOF) {
fclose(upload);
upload = NULL;
if (!initial_upload) {
ungetc('M', stdin);
ungetc('O', stdin);
ungetc('E', stdin);
}
} else {
break;
}
}
C = getchar();
/* on ESC, we read the rest of the line as a filename to override
* stdin with */
if (C == ESC) {
initial_upload = 0;
char buf[200];
str_rtrim(fgets(buf, sizeof(buf), stdin));
upload = fopen(buf, "r");
fprintf(stderr, "opening %s\n", buf);
if (!upload)
perror("opening file");
/* hack to get us reinterpreting the same instruction */
IP--;
continue;
}
break;
case LOADP:
if (B) {
check(B < num_arrays && arrays[B]-1);
#if COLLECT_STATS
memory_use -= zero[-1]+1;
memory_use += array(B)[-1]+1;
#endif
free(zero-1);
ray = calloc(array(B)[-1] + 1, sizeof(uint32_t));
memcpy(ray, array(B)-1, (array(B)[-1] + 1)*sizeof(uint32_t));
zero = ray+1;
zero_bounds = ray[0];
}
IP = C;
break;
case ORTHO:
regs[(instr >> 25) & 0x7] = (instr & 0x1ffffff);
break;
default:
#if !SAFE
__builtin_unreachable();
#endif
fprintf(stderr, "got %x %u!\n", instr, op);
exit(2);
}
}
(void)zero_bounds; // quiet a warning
return 0;
}
/*
* Implementation of a simulator for the Universal Machine (UM) for the 2006
* ICFP Programming Contest.
* This is remarkably fragile. The spec says the machine may "fail"
* under certain conditions. Under many of those conditions, the
* machine will indeed fail spectacularly, often in ways that would
* allow the execution of arbitrary code on the real processor.
*
* BUG: Because pointers are stuffed into 32-bit integers, this only
* works in 64-bit mode if malloc() never returns pointers that use
* more than 32-bits. In practice, with my glibc version, if the
* M_MMAP_MAX malloc parameter is set to 0, all allocations are made
* from low memory (until they can't be, of course). This is really,
* really wrong. It is also really, really easy. And, on top of it,
* the 64-bit version runs faster on my machine!
*
* Actually that stuff above isn't true anymore; it seems to have
* broken. But using -mx32 seems to work except I need to declare
* ntohl myeslf because arpa/inet.h is broken. Also apparently -mx32
* might be deprecated.
*
* Wait, nevermind to that too, compiling with -no-pie fixes it.
*/
#define COLLECT_STATS 0
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <arpa/inet.h>
#include <signal.h>
#include <ctype.h>
#include <assert.h>
#include <malloc.h>
/* Note: ^v is needed in readline to prefix the thinger */
#define ESC 27
#define EM 25 /* ^y */
/* str_rtrim: gets rid of whitespace on the right of a string by
* null terminating */
char *str_rtrim(char *s)
{
char *t = s;
char *last; /* last non-whitespace char */
if (!s) return s;
for (last = s; *s; s++)
if (!isspace(*s))
last = s+1;
if (*last)
*(last) = '\0';
return t;
}
uint32_t *read_file(char *filename)
{
int fd;
struct stat buf;
size_t size;
uint32_t *data;
size_t i;
if ((fd = open(filename, O_RDONLY)) < 0) {
perror("open");
exit(1);
}
fstat(fd, &buf);
size = (buf.st_size + 3)/4 + 2; /* +1 for rounding, +1 for size field */
data = calloc(size, sizeof(uint32_t));
*data = size;
data++;
if (read(fd, data, buf.st_size) < 0) {
perror("read");
exit(1);
}
close(fd);
for (i = 0; i < size-1; i++)
data[i] = ntohl(data[i]);
return data;
}
typedef enum opcode_t {
CMOV, INDEX, AMEND, ADD, MUL, DIV, NAND, HALT, ALLOC, ABANDON,
OUT, IN, LOADP, ORTHO } opcode_t;
char *names[] = {
"CMOV", "INDEX", "AMEND", "ADD", "MUL", "DIV", "NAND", "HALT", "ALLOC",
"ABAND",
"OUT", "IN", "LOADP", "ORTHO" };
unsigned long long instr_count = 0;
unsigned long long instr_histogram[ORTHO+1];
void dump_histogram() {
for (int i = 0; i < ORTHO+1; i++) {
fprintf(stderr, "%s:\t%llu\n", names[i], instr_histogram[i]);
}
fprintf(stderr, "Total:\t%llu\n", instr_count);
}
#define MAX_SIZE 64
unsigned long long mem_histogram[MAX_SIZE+1];
unsigned long long mem_bigger;
void dump_mem_histogram() {
for (int i = 0; i <= MAX_SIZE; i++) {
if (mem_histogram[i]) {
fprintf(stderr, "%d:\t%llu\n", i, mem_histogram[i]);
}
}
fprintf(stderr, ">%d\t%llu\n", MAX_SIZE, mem_bigger);
uint64_t amount_under = instr_histogram[ALLOC]-mem_bigger;
double frac_under = (double)amount_under/(double)instr_histogram[ALLOC];
fprintf(stderr, "%% <= %d: %lf\n", MAX_SIZE, frac_under*100);
}
void incr_mem_histogram(unsigned size) {
if (size <= MAX_SIZE) {
mem_histogram[size]++;
} else {
//fprintf(stderr, "WOW! %d\n", size);
mem_bigger++;
}
}
#define OP ((instr >> 28) & 0xff)
#define A (regs[((instr >> 6) & 0x7)])
#define B (regs[((instr >> 3) & 0x7)])
#define C (regs[(instr & 0x7)])
#define array(p) ((p) ? ((uint32_t *)(uintptr_t)(p)) : zero)
uint32_t regs[8];
void dump_regs() {
for (int i = 0; i < 8; i++) {
fprintf(stderr, "%d: 0x%08x (%d)\n", i, regs[i], regs[i]);
}
}
#define MAX(x, y) ((x) > (y) ? (x) : (y))
int main(int argc, char *argv[])
{
uint32_t *zero;
uint32_t instr;
uint32_t IP = 0;
opcode_t op;
uint32_t *ray;
int initial_upload = 1;
FILE *dump = NULL;
FILE *upload = NULL;
if (argc < 2 || argc > 4) return 1;
zero = read_file(argv[1]);
if (argc >= 3) {
upload = fopen(argv[2], "r");
}
if (argc == 4) {
dump = fopen(argv[3], "w");
setvbuf(dump, NULL, _IOLBF, BUFSIZ);
}
#ifdef M_MMAP_MAX
mallopt(M_MMAP_MAX, 0);
#endif
/* ignore SIGINT to protect me from my own stupidity */
if (isatty(0) && strstr(argv[1], "umix") != NULL)
signal(SIGINT, SIG_IGN);
#if COLLECT_STATS
int allocs_since = 0, max_allocs_since = 0;
int abandons_since = 0, max_abandons_since = 0;
unsigned memory_use = zero[-1];
#endif
while (1) {
instr = zero[IP];
op = OP;
IP++;
#if COLLECT_STATS
instr_count++;
instr_histogram[op]++;
#endif
switch(op) {
case CMOV:
if (C)
A = B;
break;
case INDEX:
A = array(B)[C];
break;
case AMEND:
array(A)[B] = C;
break;
case ADD:
A = B + C;
break;
case MUL:
A = B * C;
break;
case DIV:
A = B / C;
break;
case NAND:
A = ~(B & C);
break;
case HALT:
#if COLLECT_STATS
dump_histogram();
dump_mem_histogram();
dump_regs();
fprintf(stderr, "Max memory use: %u bytes (%0.2f MB)\n",
memory_use, memory_use/(1024.0*1024.0));
#endif
exit(0);
case ALLOC:
#if COLLECT_STATS
memory_use += C+1;
incr_mem_histogram(C);
if (abandons_since) {
// printf("ABANDONS SINCE: %d\n", abandons_since);
max_abandons_since = MAX(max_abandons_since, abandons_since);
abandons_since = 0;
}
allocs_since++;
#endif
ray = calloc(C+1, sizeof(uint32_t));
assert((uintptr_t)ray == (uintptr_t)(uint32_t)(uintptr_t)ray);
*ray = C+1;
B = (uint32_t)(uintptr_t)(ray+1);
break;
case ABANDON:
#if COLLECT_STATS
memory_use -= array(C)[-1];
abandons_since++;
if (allocs_since) {
// printf("ALLOCS SINCE: %d\n", allocs_since);
max_allocs_since = MAX(max_allocs_since, allocs_since);
allocs_since = 0;
}
#endif
free(array(C)-1);
break;
case OUT:
putchar(C);
if (dump) fputc(C, dump);
break;
case IN:
/* deal with having input overridden for upload */
if (upload) {
C = getc(upload);
if ((int)C == EOF) {
fclose(upload);
upload = NULL;
if (!initial_upload) {
ungetc('M', stdin);
ungetc('O', stdin);
ungetc('E', stdin);
}
} else {
break;
}
}
C = getchar();
/* on ESC, we read the rest of the line as a filename to override
* stdin with */
if (C == ESC) {
initial_upload = 0;
char buf[200];
str_rtrim(fgets(buf, sizeof(buf), stdin));
upload = fopen(buf, "r");
fprintf(stderr, "opening %s\n", buf);
if (!upload)
perror("opening file");
/* hack to get us reinterpreting the same instruction */
IP--;
continue;
}
break;
case LOADP:
if (B) {
#if COLLECT_STATS
memory_use -= zero[-1];
memory_use += array(B)[-1];
#endif
free(zero-1);
ray = calloc(array(B)[-1], sizeof(uint32_t));
memcpy(ray, array(B)-1, array(B)[-1]*sizeof(uint32_t));
zero = ray+1;
}
IP = C;
break;
case ORTHO:
regs[(instr >> 25) & 0x7] = (instr & 0x1ffffff);
break;
default:
__builtin_unreachable();
fprintf(stderr, "got %x %u!\n", instr, op);
abort();
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment