Created
September 14, 2025 00:02
-
-
Save msullivan/a0917c1e54136f620e5e6c733ca7686c to your computer and use it in GitHub Desktop.
UM implementations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Implementation of a simulator for the Universal Machine (UM) for the 2006 | |
| * ICFP Programming Contest. | |
| * | |
| * This is an "honest" version that doesn't rely on sticking pointers | |
| * into 32-bit integers. It is maybe a little slower, but not more | |
| * than 10%? My measurements have all been kind of noisy. | |
| * | |
| * If SAFE is defined, then we do bounds checks and the like. | |
| * That's a decent mount more slow. | |
| */ | |
| #define SAFE 1 | |
| #define COLLECT_STATS 0 | |
| #include <sys/types.h> | |
| #include <sys/stat.h> | |
| #include <fcntl.h> | |
| #include <unistd.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <arpa/inet.h> | |
| #include <signal.h> | |
| #include <ctype.h> | |
| #include <assert.h> | |
| #include <malloc.h> | |
| /* Note: ^v is needed in readline to prefix the thinger */ | |
| #define ESC 27 | |
| #define EM 25 /* ^y */ | |
| /* str_rtrim: gets rid of whitespace on the right of a string by | |
| * null terminating */ | |
| char *str_rtrim(char *s) | |
| { | |
| char *t = s; | |
| char *last; /* last non-whitespace char */ | |
| if (!s) return s; | |
| for (last = s; *s; s++) | |
| if (!isspace(*s)) | |
| last = s+1; | |
| if (*last) | |
| *(last) = '\0'; | |
| return t; | |
| } | |
| uint32_t *read_file(char *filename) | |
| { | |
| int fd; | |
| struct stat buf; | |
| size_t size; | |
| uint32_t *data; | |
| size_t i; | |
| if ((fd = open(filename, O_RDONLY)) < 0) { | |
| perror("open"); | |
| exit(1); | |
| } | |
| fstat(fd, &buf); | |
| size = (buf.st_size + 3)/4 + 1; /* +1 for rounding, +1 for size field */ | |
| data = calloc(size + 1, sizeof(uint32_t)); | |
| *data = size; | |
| data++; | |
| if (read(fd, data, buf.st_size) < 0) { | |
| perror("read"); | |
| exit(1); | |
| } | |
| close(fd); | |
| for (i = 0; i < size; i++) | |
| data[i] = ntohl(data[i]); | |
| return data; | |
| } | |
| typedef enum opcode_t { | |
| CMOV, INDEX, AMEND, ADD, MUL, DIV, NAND, HALT, ALLOC, ABANDON, | |
| OUT, IN, LOADP, ORTHO } opcode_t; | |
| char *names[] = { | |
| "CMOV", "INDEX", "AMEND", "ADD", "MUL", "DIV", "NAND", "HALT", "ALLOC", | |
| "ABAND", | |
| "OUT", "IN", "LOADP", "ORTHO" }; | |
| unsigned long long instr_count = 0; | |
| unsigned long long instr_histogram[ORTHO+1]; | |
| void dump_histogram() { | |
| for (int i = 0; i < ORTHO+1; i++) { | |
| fprintf(stderr, "%s:\t%llu\n", names[i], instr_histogram[i]); | |
| } | |
| fprintf(stderr, "Total:\t%llu\n", instr_count); | |
| } | |
| #define MAX_SIZE 64 | |
| unsigned long long mem_histogram[MAX_SIZE+1]; | |
| unsigned long long mem_bigger; | |
| void dump_mem_histogram() { | |
| for (int i = 0; i <= MAX_SIZE; i++) { | |
| if (mem_histogram[i]) { | |
| fprintf(stderr, "%d:\t%llu\n", i, mem_histogram[i]); | |
| } | |
| } | |
| fprintf(stderr, ">%d\t%llu\n", MAX_SIZE, mem_bigger); | |
| uint64_t amount_under = instr_histogram[ALLOC]-mem_bigger; | |
| double frac_under = (double)amount_under/(double)instr_histogram[ALLOC]; | |
| fprintf(stderr, "%% <= %d: %lf\n", MAX_SIZE, frac_under*100); | |
| } | |
| void incr_mem_histogram(unsigned size) { | |
| if (size <= MAX_SIZE) { | |
| mem_histogram[size]++; | |
| } else { | |
| //fprintf(stderr, "WOW! %d\n", size); | |
| mem_bigger++; | |
| } | |
| } | |
| #define OP ((instr >> 28) & 0xff) | |
| #define A (regs[((instr >> 6) & 0x7)]) | |
| #define B (regs[((instr >> 3) & 0x7)]) | |
| #define C (regs[(instr & 0x7)]) | |
| #define array(p) ((p) ? arrays[p] : zero) | |
| uint32_t regs[8]; | |
| void dump_regs() { | |
| for (int i = 0; i < 8; i++) { | |
| fprintf(stderr, "%d: 0x%08x (%d)\n", i, regs[i], regs[i]); | |
| } | |
| } | |
| #define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
| #if SAFE | |
| #define check(x) do { \ | |
| if (!(x)) { \ | |
| fprintf(stderr, "check failed: %s\n", #x); \ | |
| exit(1); \ | |
| } \ | |
| } while (0) | |
| #else | |
| #define check(x) do { } while (0) | |
| #endif | |
| int main(int argc, char *argv[]) | |
| { | |
| uint32_t *zero; | |
| unsigned zero_bounds; | |
| uint32_t instr; | |
| uint32_t IP = 0; | |
| opcode_t op; | |
| uint32_t *ray; | |
| int initial_upload = 1; | |
| FILE *dump = NULL; | |
| FILE *upload = NULL; | |
| unsigned num_arrays = 100; /* The maximum number of arrays */ | |
| int num_frees = 100; | |
| int next_unallocated = 1; /* the slot in array not yet allocated. */ | |
| int next_free = 0; /* the first thing in free_slots */ | |
| uint32_t **arrays; | |
| int *free_slots; | |
| free_slots = calloc(num_frees, sizeof(int *)); | |
| check(free_slots); | |
| arrays = calloc(num_arrays, sizeof(int **)); | |
| check(arrays); | |
| if (argc < 2 || argc > 4) return 1; | |
| zero = read_file(argv[1]); | |
| zero_bounds = zero[-1]; | |
| if (argc >= 3) { | |
| upload = fopen(argv[2], "r"); | |
| } | |
| if (argc == 4) { | |
| dump = fopen(argv[3], "w"); | |
| setvbuf(dump, NULL, _IOLBF, BUFSIZ); | |
| } | |
| /* ignore SIGINT to protect me from my own stupidity */ | |
| if (strstr(argv[1], "umix") != NULL) | |
| signal(SIGINT, SIG_IGN); | |
| #if COLLECT_STATS | |
| int allocs_since = 0, max_allocs_since = 0; | |
| int abandons_since = 0, max_abandons_since = 0; | |
| unsigned memory_use = zero[-1]; | |
| #endif | |
| while (1) { | |
| check(IP < zero_bounds); | |
| instr = zero[IP]; | |
| op = OP; | |
| IP++; | |
| #if COLLECT_STATS | |
| instr_count++; | |
| instr_histogram[op]++; | |
| #endif | |
| switch(op) { | |
| case CMOV: | |
| if (C) | |
| A = B; | |
| break; | |
| case INDEX: | |
| check(B < num_arrays); | |
| check(B == 0 || arrays[B]-1 != NULL); | |
| check(C < array(B)[-1]); | |
| A = array(B)[C]; | |
| break; | |
| case AMEND: | |
| check(A < num_arrays); | |
| check(A == 0 || arrays[A]-1 != NULL); | |
| check(B < array(A)[-1]); | |
| array(A)[B] = C; | |
| break; | |
| case ADD: | |
| A = B + C; | |
| break; | |
| case MUL: | |
| A = B * C; | |
| break; | |
| case DIV: | |
| check(C); | |
| A = B / C; | |
| break; | |
| case NAND: | |
| A = ~(B & C); | |
| break; | |
| case HALT: | |
| #if COLLECT_STATS | |
| dump_histogram(); | |
| dump_mem_histogram(); | |
| dump_regs(); | |
| fprintf(stderr, "Max memory use: %u bytes (%0.2f MB)\n", | |
| memory_use, memory_use/(1024.0*1024.0)); | |
| #endif | |
| exit(0); | |
| case ALLOC: | |
| #if COLLECT_STATS | |
| memory_use += C+1; | |
| incr_mem_histogram(C); | |
| if (abandons_since) { | |
| // printf("ABANDONS SINCE: %d\n", abandons_since); | |
| max_abandons_since = MAX(max_abandons_since, abandons_since); | |
| abandons_since = 0; | |
| } | |
| allocs_since++; | |
| #endif | |
| { | |
| uint32_t idx; | |
| if (next_free > 0) { | |
| idx = free_slots[next_free]; | |
| next_free--; | |
| } else { | |
| idx = next_unallocated; | |
| next_unallocated++; | |
| if (idx >= num_arrays) { | |
| arrays = realloc(arrays, num_arrays*sizeof(uint32_t **)*2); | |
| check(arrays); | |
| #if SAFE | |
| for (unsigned i = num_arrays; i < num_arrays*2; i++) { | |
| arrays[i] = NULL; | |
| } | |
| #endif | |
| num_arrays *= 2; | |
| } | |
| } | |
| ray = calloc(C+1, sizeof(uint32_t)); | |
| *ray = C; | |
| arrays[idx] = ray + 1; | |
| B = idx; | |
| break; | |
| } | |
| case ABANDON: | |
| check(C != 0 && C < num_arrays && arrays[C]-1); | |
| #if COLLECT_STATS | |
| memory_use -= array(C)[-1] + 1; | |
| abandons_since++; | |
| if (allocs_since) { | |
| // printf("ALLOCS SINCE: %d\n", allocs_since); | |
| max_allocs_since = MAX(max_allocs_since, allocs_since); | |
| allocs_since = 0; | |
| } | |
| #endif | |
| next_free++; | |
| if (next_free >= num_frees) { | |
| num_frees *= 2; | |
| free_slots = realloc(free_slots, num_frees*sizeof(int)); | |
| check(free_slots); | |
| } | |
| free_slots[next_free] = C; | |
| free(array(C)-1); | |
| arrays[C] = NULL; | |
| break; | |
| case OUT: | |
| check(C < 256); | |
| putchar(C); | |
| if (dump) fputc(C, dump); | |
| break; | |
| case IN: | |
| /* deal with having input overridden for upload */ | |
| if (upload) { | |
| C = getc(upload); | |
| if ((int)C == EOF) { | |
| fclose(upload); | |
| upload = NULL; | |
| if (!initial_upload) { | |
| ungetc('M', stdin); | |
| ungetc('O', stdin); | |
| ungetc('E', stdin); | |
| } | |
| } else { | |
| break; | |
| } | |
| } | |
| C = getchar(); | |
| /* on ESC, we read the rest of the line as a filename to override | |
| * stdin with */ | |
| if (C == ESC) { | |
| initial_upload = 0; | |
| char buf[200]; | |
| str_rtrim(fgets(buf, sizeof(buf), stdin)); | |
| upload = fopen(buf, "r"); | |
| fprintf(stderr, "opening %s\n", buf); | |
| if (!upload) | |
| perror("opening file"); | |
| /* hack to get us reinterpreting the same instruction */ | |
| IP--; | |
| continue; | |
| } | |
| break; | |
| case LOADP: | |
| if (B) { | |
| check(B < num_arrays && arrays[B]-1); | |
| #if COLLECT_STATS | |
| memory_use -= zero[-1]+1; | |
| memory_use += array(B)[-1]+1; | |
| #endif | |
| free(zero-1); | |
| ray = calloc(array(B)[-1] + 1, sizeof(uint32_t)); | |
| memcpy(ray, array(B)-1, (array(B)[-1] + 1)*sizeof(uint32_t)); | |
| zero = ray+1; | |
| zero_bounds = ray[0]; | |
| } | |
| IP = C; | |
| break; | |
| case ORTHO: | |
| regs[(instr >> 25) & 0x7] = (instr & 0x1ffffff); | |
| break; | |
| default: | |
| #if !SAFE | |
| __builtin_unreachable(); | |
| #endif | |
| fprintf(stderr, "got %x %u!\n", instr, op); | |
| exit(2); | |
| } | |
| } | |
| (void)zero_bounds; // quiet a warning | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Implementation of a simulator for the Universal Machine (UM) for the 2006 | |
| * ICFP Programming Contest. | |
| * This is remarkably fragile. The spec says the machine may "fail" | |
| * under certain conditions. Under many of those conditions, the | |
| * machine will indeed fail spectacularly, often in ways that would | |
| * allow the execution of arbitrary code on the real processor. | |
| * | |
| * BUG: Because pointers are stuffed into 32-bit integers, this only | |
| * works in 64-bit mode if malloc() never returns pointers that use | |
| * more than 32-bits. In practice, with my glibc version, if the | |
| * M_MMAP_MAX malloc parameter is set to 0, all allocations are made | |
| * from low memory (until they can't be, of course). This is really, | |
| * really wrong. It is also really, really easy. And, on top of it, | |
| * the 64-bit version runs faster on my machine! | |
| * | |
| * Actually that stuff above isn't true anymore; it seems to have | |
| * broken. But using -mx32 seems to work except I need to declare | |
| * ntohl myeslf because arpa/inet.h is broken. Also apparently -mx32 | |
| * might be deprecated. | |
| * | |
| * Wait, nevermind to that too, compiling with -no-pie fixes it. | |
| */ | |
| #define COLLECT_STATS 0 | |
| #include <sys/types.h> | |
| #include <sys/stat.h> | |
| #include <fcntl.h> | |
| #include <unistd.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <arpa/inet.h> | |
| #include <signal.h> | |
| #include <ctype.h> | |
| #include <assert.h> | |
| #include <malloc.h> | |
| /* Note: ^v is needed in readline to prefix the thinger */ | |
| #define ESC 27 | |
| #define EM 25 /* ^y */ | |
| /* str_rtrim: gets rid of whitespace on the right of a string by | |
| * null terminating */ | |
| char *str_rtrim(char *s) | |
| { | |
| char *t = s; | |
| char *last; /* last non-whitespace char */ | |
| if (!s) return s; | |
| for (last = s; *s; s++) | |
| if (!isspace(*s)) | |
| last = s+1; | |
| if (*last) | |
| *(last) = '\0'; | |
| return t; | |
| } | |
| uint32_t *read_file(char *filename) | |
| { | |
| int fd; | |
| struct stat buf; | |
| size_t size; | |
| uint32_t *data; | |
| size_t i; | |
| if ((fd = open(filename, O_RDONLY)) < 0) { | |
| perror("open"); | |
| exit(1); | |
| } | |
| fstat(fd, &buf); | |
| size = (buf.st_size + 3)/4 + 2; /* +1 for rounding, +1 for size field */ | |
| data = calloc(size, sizeof(uint32_t)); | |
| *data = size; | |
| data++; | |
| if (read(fd, data, buf.st_size) < 0) { | |
| perror("read"); | |
| exit(1); | |
| } | |
| close(fd); | |
| for (i = 0; i < size-1; i++) | |
| data[i] = ntohl(data[i]); | |
| return data; | |
| } | |
| typedef enum opcode_t { | |
| CMOV, INDEX, AMEND, ADD, MUL, DIV, NAND, HALT, ALLOC, ABANDON, | |
| OUT, IN, LOADP, ORTHO } opcode_t; | |
| char *names[] = { | |
| "CMOV", "INDEX", "AMEND", "ADD", "MUL", "DIV", "NAND", "HALT", "ALLOC", | |
| "ABAND", | |
| "OUT", "IN", "LOADP", "ORTHO" }; | |
| unsigned long long instr_count = 0; | |
| unsigned long long instr_histogram[ORTHO+1]; | |
| void dump_histogram() { | |
| for (int i = 0; i < ORTHO+1; i++) { | |
| fprintf(stderr, "%s:\t%llu\n", names[i], instr_histogram[i]); | |
| } | |
| fprintf(stderr, "Total:\t%llu\n", instr_count); | |
| } | |
| #define MAX_SIZE 64 | |
| unsigned long long mem_histogram[MAX_SIZE+1]; | |
| unsigned long long mem_bigger; | |
| void dump_mem_histogram() { | |
| for (int i = 0; i <= MAX_SIZE; i++) { | |
| if (mem_histogram[i]) { | |
| fprintf(stderr, "%d:\t%llu\n", i, mem_histogram[i]); | |
| } | |
| } | |
| fprintf(stderr, ">%d\t%llu\n", MAX_SIZE, mem_bigger); | |
| uint64_t amount_under = instr_histogram[ALLOC]-mem_bigger; | |
| double frac_under = (double)amount_under/(double)instr_histogram[ALLOC]; | |
| fprintf(stderr, "%% <= %d: %lf\n", MAX_SIZE, frac_under*100); | |
| } | |
| void incr_mem_histogram(unsigned size) { | |
| if (size <= MAX_SIZE) { | |
| mem_histogram[size]++; | |
| } else { | |
| //fprintf(stderr, "WOW! %d\n", size); | |
| mem_bigger++; | |
| } | |
| } | |
| #define OP ((instr >> 28) & 0xff) | |
| #define A (regs[((instr >> 6) & 0x7)]) | |
| #define B (regs[((instr >> 3) & 0x7)]) | |
| #define C (regs[(instr & 0x7)]) | |
| #define array(p) ((p) ? ((uint32_t *)(uintptr_t)(p)) : zero) | |
| uint32_t regs[8]; | |
| void dump_regs() { | |
| for (int i = 0; i < 8; i++) { | |
| fprintf(stderr, "%d: 0x%08x (%d)\n", i, regs[i], regs[i]); | |
| } | |
| } | |
| #define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
| int main(int argc, char *argv[]) | |
| { | |
| uint32_t *zero; | |
| uint32_t instr; | |
| uint32_t IP = 0; | |
| opcode_t op; | |
| uint32_t *ray; | |
| int initial_upload = 1; | |
| FILE *dump = NULL; | |
| FILE *upload = NULL; | |
| if (argc < 2 || argc > 4) return 1; | |
| zero = read_file(argv[1]); | |
| if (argc >= 3) { | |
| upload = fopen(argv[2], "r"); | |
| } | |
| if (argc == 4) { | |
| dump = fopen(argv[3], "w"); | |
| setvbuf(dump, NULL, _IOLBF, BUFSIZ); | |
| } | |
| #ifdef M_MMAP_MAX | |
| mallopt(M_MMAP_MAX, 0); | |
| #endif | |
| /* ignore SIGINT to protect me from my own stupidity */ | |
| if (isatty(0) && strstr(argv[1], "umix") != NULL) | |
| signal(SIGINT, SIG_IGN); | |
| #if COLLECT_STATS | |
| int allocs_since = 0, max_allocs_since = 0; | |
| int abandons_since = 0, max_abandons_since = 0; | |
| unsigned memory_use = zero[-1]; | |
| #endif | |
| while (1) { | |
| instr = zero[IP]; | |
| op = OP; | |
| IP++; | |
| #if COLLECT_STATS | |
| instr_count++; | |
| instr_histogram[op]++; | |
| #endif | |
| switch(op) { | |
| case CMOV: | |
| if (C) | |
| A = B; | |
| break; | |
| case INDEX: | |
| A = array(B)[C]; | |
| break; | |
| case AMEND: | |
| array(A)[B] = C; | |
| break; | |
| case ADD: | |
| A = B + C; | |
| break; | |
| case MUL: | |
| A = B * C; | |
| break; | |
| case DIV: | |
| A = B / C; | |
| break; | |
| case NAND: | |
| A = ~(B & C); | |
| break; | |
| case HALT: | |
| #if COLLECT_STATS | |
| dump_histogram(); | |
| dump_mem_histogram(); | |
| dump_regs(); | |
| fprintf(stderr, "Max memory use: %u bytes (%0.2f MB)\n", | |
| memory_use, memory_use/(1024.0*1024.0)); | |
| #endif | |
| exit(0); | |
| case ALLOC: | |
| #if COLLECT_STATS | |
| memory_use += C+1; | |
| incr_mem_histogram(C); | |
| if (abandons_since) { | |
| // printf("ABANDONS SINCE: %d\n", abandons_since); | |
| max_abandons_since = MAX(max_abandons_since, abandons_since); | |
| abandons_since = 0; | |
| } | |
| allocs_since++; | |
| #endif | |
| ray = calloc(C+1, sizeof(uint32_t)); | |
| assert((uintptr_t)ray == (uintptr_t)(uint32_t)(uintptr_t)ray); | |
| *ray = C+1; | |
| B = (uint32_t)(uintptr_t)(ray+1); | |
| break; | |
| case ABANDON: | |
| #if COLLECT_STATS | |
| memory_use -= array(C)[-1]; | |
| abandons_since++; | |
| if (allocs_since) { | |
| // printf("ALLOCS SINCE: %d\n", allocs_since); | |
| max_allocs_since = MAX(max_allocs_since, allocs_since); | |
| allocs_since = 0; | |
| } | |
| #endif | |
| free(array(C)-1); | |
| break; | |
| case OUT: | |
| putchar(C); | |
| if (dump) fputc(C, dump); | |
| break; | |
| case IN: | |
| /* deal with having input overridden for upload */ | |
| if (upload) { | |
| C = getc(upload); | |
| if ((int)C == EOF) { | |
| fclose(upload); | |
| upload = NULL; | |
| if (!initial_upload) { | |
| ungetc('M', stdin); | |
| ungetc('O', stdin); | |
| ungetc('E', stdin); | |
| } | |
| } else { | |
| break; | |
| } | |
| } | |
| C = getchar(); | |
| /* on ESC, we read the rest of the line as a filename to override | |
| * stdin with */ | |
| if (C == ESC) { | |
| initial_upload = 0; | |
| char buf[200]; | |
| str_rtrim(fgets(buf, sizeof(buf), stdin)); | |
| upload = fopen(buf, "r"); | |
| fprintf(stderr, "opening %s\n", buf); | |
| if (!upload) | |
| perror("opening file"); | |
| /* hack to get us reinterpreting the same instruction */ | |
| IP--; | |
| continue; | |
| } | |
| break; | |
| case LOADP: | |
| if (B) { | |
| #if COLLECT_STATS | |
| memory_use -= zero[-1]; | |
| memory_use += array(B)[-1]; | |
| #endif | |
| free(zero-1); | |
| ray = calloc(array(B)[-1], sizeof(uint32_t)); | |
| memcpy(ray, array(B)-1, array(B)[-1]*sizeof(uint32_t)); | |
| zero = ray+1; | |
| } | |
| IP = C; | |
| break; | |
| case ORTHO: | |
| regs[(instr >> 25) & 0x7] = (instr & 0x1ffffff); | |
| break; | |
| default: | |
| __builtin_unreachable(); | |
| fprintf(stderr, "got %x %u!\n", instr, op); | |
| abort(); | |
| } | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment