Skip to content

Instantly share code, notes, and snippets.

@andersonsp
Last active September 5, 2019 16:48
Show Gist options
  • Save andersonsp/e92a4c39af34f2bb667567fed2592002 to your computer and use it in GitHub Desktop.
Save andersonsp/e92a4c39af34f2bb667567fed2592002 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
enum {
CW_LIT, CW_COMPILE, CW_CALL, CW_JMP,
CW_DEF, CW_IMM, CW__TOK, CW__EXEC, CW_EXIT, CW_ECHO,
CW_LAST
};
char *words[] = { "lit", "compile", "call", "jmp",
"def", "imm", "_tok", "_exec", "exit", "echo",
"last"
};
// if we encounter a 2 and we call r with it, it will lookup the codeword
// stored at code[2] which is 0, which means pushint
#define ADDR_OF_PUSHINT 2
char str_mem[5000];
int code[20000] = { 0 },
// when defining the first word (CW_DEF), address of prev word will be
// 1 which is used to know if we are at the first (last) dictionary
// definition when doing lookup on _read
d_stack[500],
r_stack[500],
dict_top = 1,
str_top = 64; // strings start at index 64 since first 64 bytes are used to read user input
// the first dictionary append is at index 32
int pc = 0, dp = 32, sp = 0, rp = 0;
void emit(int val) {
code[dp++] = val;
}
void def_word(int codeword) {
emit(dict_top);
dict_top = dp - 1;
emit(str_top);
emit(codeword);
scanf("%s", str_mem + str_top);
str_top += strlen(str_mem + str_top) + 1;
}
int entry_addr;
void run(int word_addr) {
int next_word = word_addr + 1;
int codeword = code[word_addr];
// printf("[%3d] %s\n", word_addr, codeword < CW_LAST ? words[codeword] : "");
switch (codeword) {
case CW__TOK:
// first 64 bytes of str_mem are used to read user input, if
// word is larger than that it will overwrite word names
if (scanf("%s", str_mem) < 1) exit(0);
entry_addr = dict_top;
while (strcmp(str_mem, &str_mem[code[entry_addr+1]])) entry_addr = code[entry_addr];
break;
case CW__EXEC: // _read
// if entry_addr != 1 then the word the user entered exists, so we run it
if (entry_addr != 1) {
// we calculate the address of the data for the entry
run(entry_addr + 2); // and run it
} else {
// if we didn't find the entry we assume it's a number we
// append the addr of pushint instruction and then the number
emit(ADDR_OF_PUSHINT);
emit(atoi(str_mem));
}
break;
case CW_EXIT: // exit
pc = r_stack[--rp]; // leave the current function: pop the return stack into the program counter
break;
case CW_COMPILE: // compile code
// a pointer to the next word is appended to the dictionary
emit(next_word);
break;
case CW_LIT: // pushint
d_stack[sp++] = code[pc++];
break;
case CW_CALL: // run code
r_stack[rp++] = pc; // push program counter into return stack
pc = next_word; // jump to the address of the next word
break;
case CW_JMP: // tail call
fprintf(stderr, "jmp ");
pc = next_word; // jump to the address of the next word
break;
case CW_IMM: // immediate
dp -= 2;
emit(CW_CALL);
break;
case CW_DEF: // :
def_word(CW_COMPILE);
emit(CW_CALL);
break;
case CW_ECHO: // echo
putchar(d_stack[--sp]);
break;
default:
assert(0);
}
}
int main() {
// : (codeword 4) 0, 1 and 2 are internal words with no names
// 0: pushint
// 1: compile
// 2: run
// 3: jmp
def_word(CW_DEF);
def_word(CW_IMM); // immediate (codeword 4)
def_word(CW_COMPILE);
int tok_w = dp;
emit(CW__TOK); // token (codeword 5)
// define the read loop, by defining the word _exec as
// 40: CW_COMPILE
// 41: CW__READ
// 42: CW_RUN
// 43: 41
// 44: 42
//
// from the design file: FIRST builds a very small word internally that it
// executes as its main loop. This word calls _read and then calls itself.
// Each time it calls itself, it uses up a word on the return stack, so it
// will eventually trash things.
def_word(CW_COMPILE);
int exec_w = dp;
emit(CW__EXEC);
emit(CW_CALL);
pc = dp;
emit(tok_w); // appends 41
emit(exec_w); // appends 41
emit(pc - 1); // appends 42
// define the rest of builtin words, they will have two instructions
// CW_COMPILE and the builtin codeword for them, a number for the opcode
for (int i = CW_EXIT; i < CW_LAST; i++) {
def_word(CW_COMPILE);
emit(i);
}
// at the beginning of the loop pc points to Repl
for (;;) run(code[pc++]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment