Created
January 26, 2021 18:19
-
-
Save jstimpfle/f9d534723955fd05ff5a19942c69960d to your computer and use it in GitHub Desktop.
A little stack machine, done in a few hours
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdarg.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <inttypes.h> | |
#define NORETURN __attribute__((noreturn)) | |
#define ARRAY_LENGTH(a) ((int) (sizeof (a) / sizeof (a)[0])) | |
void msg_fv(const char *fmt, va_list ap) | |
{ | |
vfprintf(stderr, fmt, ap); | |
fprintf(stderr, "\n"); | |
} | |
void msg_f(const char *fmt, ...) | |
{ | |
va_list ap; | |
va_start(ap, fmt); | |
msg_fv(fmt, ap); | |
va_end(ap); | |
} | |
void NORETURN fatal_fv(const char *fmt, va_list ap) | |
{ | |
msg_fv(fmt, ap); | |
abort(); | |
} | |
void NORETURN fatal_f(const char *fmt, ...) | |
{ | |
va_list ap; | |
va_start(ap, fmt); | |
msg_fv(fmt, ap); | |
va_end(ap); | |
abort(); | |
} | |
void *_xcalloc(size_t num, size_t size) | |
{ | |
void *p = calloc(num, size); | |
if (!p) | |
{ | |
fatal_f("OOM!"); | |
} | |
return p; | |
} | |
#define xcalloc(num, type) ((type *) _xcalloc((num), sizeof (type))) | |
enum | |
{ | |
INSTR_DEBUG, | |
INSTR_CONST, | |
INSTR_LOAD, | |
INSTR_STORE, | |
INSTR_OP, | |
INSTR_JMP, | |
INSTR_JMPIF, | |
}; | |
enum | |
{ | |
OP_ADD, | |
OP_SUB, | |
OP_CMP, | |
}; | |
enum | |
{ | |
CMP_EQ, | |
CMP_NE, | |
CMP_LT, | |
CMP_LE, | |
CMP_GT, | |
CMP_GE, | |
}; | |
enum | |
{ | |
OBJECT_INT, | |
OBJECT_PTR, | |
OBJECT_ARRAY, | |
}; | |
typedef struct _Instr Instr; | |
typedef struct _Object Object; | |
typedef struct _ExecCtx ExecCtx; | |
typedef struct _Frame Frame; | |
struct _Instr | |
{ | |
int kind; | |
union { | |
int t_op; | |
int t_cmp; | |
int64_t t_int; // which cases? | |
struct { int64_t pc; } t_jmp; | |
struct { int64_t pc; int kind; } t_jmpif; | |
}; | |
}; | |
struct _Object | |
{ | |
int kind; | |
union | |
{ | |
int64_t t_int; | |
Object *t_ptr; | |
}; | |
}; | |
struct _Frame | |
{ | |
int64_t pc; | |
int64_t stack_size; | |
}; | |
struct _ExecCtx | |
{ | |
Instr instrs[64]; | |
int num_instrs; | |
Object stack[64]; | |
Frame frames[64]; | |
int num_frames; | |
}; | |
Frame *get_frame(ExecCtx *ctx) | |
{ | |
return ctx->frames + ctx->num_frames - 1; | |
} | |
void push_frame(ExecCtx *ctx) | |
{ | |
ctx->frames[ctx->num_frames] = ctx->frames[ctx->num_frames - 1]; | |
++ ctx->num_frames; | |
} | |
void pop_frame(ExecCtx *ctx) | |
{ | |
-- ctx->num_frames; | |
} | |
void push_object(ExecCtx *ctx, Object object) | |
{ | |
Frame *frame = get_frame(ctx); | |
ctx->stack[frame->stack_size] = object; | |
++ frame->stack_size; | |
} | |
Object pop_object(ExecCtx *ctx) | |
{ | |
Frame *frame = get_frame(ctx); | |
assert(frame->stack_size >= 1); | |
-- frame->stack_size; | |
return ctx->stack[frame->stack_size]; | |
} | |
void push_int(ExecCtx *ctx, int64_t val) | |
{ | |
Object object = {0}; | |
object.kind = OBJECT_INT; | |
object.t_int = val; | |
push_object(ctx, object); | |
} | |
void exec_op(ExecCtx *ctx, int op) | |
{ | |
Object b = pop_object(ctx); | |
Object a = pop_object(ctx); | |
if (op == OP_ADD) | |
{ | |
assert(a.kind == OBJECT_INT); | |
assert(b.kind == OBJECT_INT); | |
Object result = {0}; | |
result.kind = OBJECT_INT; | |
result.t_int = a.t_int + b.t_int; | |
push_object(ctx, result); | |
} | |
else if (op == OP_SUB) | |
{ | |
assert(a.kind == OBJECT_INT); | |
assert(b.kind == OBJECT_INT); | |
Object result = {0}; | |
result.kind = OBJECT_INT; | |
result.t_int = a.t_int - b.t_int; | |
push_object(ctx, result); | |
} | |
} | |
void print_stack(ExecCtx *ctx) | |
{ | |
Frame *frame = get_frame(ctx); | |
printf("Frame: ----------\n"); | |
for (int i = 0; i < frame->stack_size; i++) | |
{ | |
Object *a = ctx->stack + i; | |
if (a->kind == OBJECT_INT) | |
{ | |
printf("(int) %" PRIi64, a->t_int); | |
} | |
else | |
{ | |
printf("(not implemented)"); | |
} | |
printf("\n"); | |
} | |
} | |
void interp(ExecCtx *ctx) | |
{ | |
for (;;) | |
{ | |
Frame *frame = get_frame(ctx); | |
Instr *instr = ctx->instrs + frame->pc; | |
if (frame->pc == ctx->num_instrs) | |
{ | |
printf("Bye.\n"); | |
return; | |
} | |
if (instr->kind == INSTR_CONST) | |
{ | |
push_int(ctx, instr->t_int); | |
++ frame->pc; | |
} | |
else if (instr->kind == INSTR_OP) | |
{ | |
exec_op(ctx, instr->t_op); | |
++ frame->pc; | |
} | |
else if (instr->kind == INSTR_JMP) | |
{ | |
frame->pc = instr->t_jmp.pc; | |
} | |
else if (instr->kind == INSTR_JMPIF) | |
{ | |
Object object = pop_object(ctx); | |
assert(object.kind == OBJECT_INT); | |
int64_t val = object.t_int; | |
int t_cmp = instr->t_jmpif.kind; | |
int ok = 0; | |
if (t_cmp == CMP_EQ && val == 0) | |
ok = 1; | |
else if (t_cmp == CMP_NE && val != 0) | |
ok = 1; | |
else if (t_cmp == CMP_LT && val < 0) | |
ok = 1; | |
else if (t_cmp == CMP_LE && val <= 0) | |
ok = 1; | |
else if (t_cmp == CMP_GT && val > 0) | |
ok = 1; | |
else if (t_cmp == CMP_GE && val >= 0) | |
ok = 1; | |
if (ok) | |
{ | |
int64_t pc = instr->t_jmpif.pc; | |
assert(0 <= pc && pc < ctx->num_instrs); | |
frame->pc = instr->t_jmpif.pc; | |
} | |
else | |
{ | |
++ frame->pc; | |
} | |
} | |
else if (instr->kind == INSTR_DEBUG) | |
{ | |
print_stack(ctx); | |
++ frame->pc; | |
} | |
} | |
} | |
void push_instr(ExecCtx *ctx, Instr instr) | |
{ | |
assert(ctx->num_instrs < ARRAY_LENGTH(ctx->instrs)); | |
ctx->instrs[ctx->num_instrs] = instr; | |
++ ctx->num_instrs; | |
} | |
#define ALL_TOKENS \ | |
DEFINE_TOKEN(T_intlit) \ | |
DEFINE_TOKEN(T_name) \ | |
DEFINE_TOKEN(T_plus) \ | |
DEFINE_TOKEN(T_minus) \ | |
DEFINE_TOKEN(T_if) \ | |
DEFINE_TOKEN(T_braceleft) \ | |
DEFINE_TOKEN(T_braceright) \ | |
DEFINE_TOKEN(T_bracketleft) \ | |
DEFINE_TOKEN(T_bracketright) \ | |
DEFINE_TOKEN(T_parenleft) \ | |
DEFINE_TOKEN(T_parenright) \ | |
DEFINE_TOKEN(T_semicolon) \ | |
DEFINE_TOKEN(T_comma) \ | |
DEFINE_TOKEN(T_dot) \ | |
DEFINE_TOKEN(T_debug) | |
#define ALL_STMTS \ | |
DEFINE_STMT(STMT_if) \ | |
DEFINE_STMT(STMT_expr) | |
#define ALL_EXPRS \ | |
DEFINE_EXPR(EXPR_const) \ | |
DEFINE_EXPR(EXPR_binop) \ | |
DEFINE_EXPR(EXPR_call) \ | |
DEFINE_EXPR(EXPR_name) | |
enum | |
{ | |
#define DEFINE_TOKEN(x) x, | |
ALL_TOKENS | |
#undef DEFINE_TOKEN | |
NUM_TOKEN_KINDS, | |
}; | |
enum | |
{ | |
#define DEFINE_EXPR(x) x, | |
ALL_EXPRS | |
#undef DEFINE_EXPR | |
NUM_EXPR_KINDS, | |
}; | |
enum | |
{ | |
#define DEFINE_STMT(x) x, | |
ALL_STMTS | |
#undef DEFINE_STMT | |
NUM_STMT_KINDS, | |
}; | |
const char *const token_kind_string[] = { | |
#define DEFINE_TOKEN(x) #x, | |
ALL_TOKENS | |
#undef DEFINE_TOKEN | |
}; | |
const char *const expr_kind_string[] = { | |
#define DEFINE_EXPR(x) #x, | |
ALL_EXPRS | |
#undef DEFINE_EXPR | |
}; | |
const char *const stmt_kind_string[] = { | |
#define DEFINE_STMT(x) #x, | |
ALL_STMTS | |
#undef DEFINE_STMT | |
}; | |
typedef struct _Token Token; | |
struct _Token | |
{ | |
int kind; | |
union | |
{ | |
char t_name[32]; | |
int64_t t_intlit; | |
}; | |
}; | |
typedef struct _Expr Expr; | |
struct _Expr | |
{ | |
int kind; | |
union | |
{ | |
Token t_token; | |
struct | |
{ | |
int opkind; // for now a T_? enum | |
Expr *left; | |
Expr *right; | |
} t_binop; | |
struct | |
{ | |
Expr *callee_expr; | |
Expr *arg_expr; | |
} t_call; | |
}; | |
}; | |
typedef struct _Stmt Stmt; | |
struct _Stmt | |
{ | |
int kind; | |
union | |
{ | |
struct | |
{ | |
Expr *cond_expr; | |
Stmt *body_stmt; | |
Stmt *else_stmt; | |
} t_if; | |
Expr *t_expr; | |
}; | |
}; | |
Expr *alloc_expr(void) | |
{ | |
return xcalloc(1, Expr); | |
} | |
Stmt *alloc_stmt(void) | |
{ | |
return xcalloc(1, Stmt); | |
} | |
typedef struct _Parse Parse; | |
struct _Parse | |
{ | |
const char *buffer; | |
int size; | |
int cursor; | |
int have_token; | |
Token token; | |
}; | |
void NORETURN parse_error_f(Parse *p, const char *fmt, ...) | |
{ | |
fprintf(stderr, "At %d: ", p->cursor); | |
va_list ap; | |
va_start(ap, fmt); | |
fatal_fv(fmt, ap); | |
va_end(ap);//XXX | |
} | |
int look_char(Parse *p) | |
{ | |
if (p->cursor == p->size) | |
{ | |
return -1; | |
} | |
return p->buffer[p->cursor]; | |
} | |
void consume_char(Parse *p) | |
{ | |
assert(p->cursor < p->size); | |
++ p->cursor; | |
} | |
int look_token(Parse *p) | |
{ | |
if (p->have_token) | |
{ | |
return 1; | |
} | |
Token *t = &p->token; | |
int c = look_char(p); | |
if (c == -1) | |
{ | |
return 0; | |
} | |
while (c <= 32) | |
{ | |
consume_char(p); | |
c = look_char(p); | |
if (c == -1) | |
{ | |
return 0; | |
} | |
} | |
if (('a' <= c && c <= 'z') | |
|| ('A' <= c && c <= 'Z') | |
|| (c == '_')) | |
{ | |
int i = 0; | |
for (;;) | |
{ | |
t->t_name[i] = c; | |
++ i; | |
consume_char(p); | |
c = look_char(p); | |
if (c == -1) | |
{ | |
break; | |
} | |
if (!('a' <= c && c <= 'z') | |
|| ('A' <= c && c <= 'Z') | |
|| ('0' <= c && c <= '9') | |
|| (c == '_')) | |
{ | |
break; | |
} | |
} | |
t->t_name[i] = 0; | |
if (!strcmp(t->t_name, "if")) | |
{ | |
t->kind = T_if; | |
} | |
else | |
{ | |
t->kind = T_name; | |
} | |
p->have_token = 1; | |
} | |
else if ('0' <= c && c <= '9') | |
{ | |
int64_t n = c - '0'; | |
for (;;) | |
{ | |
consume_char(p); | |
c = look_char(p); | |
if (c == -1) | |
{ | |
break; | |
} | |
if (c < '0' || c > '9') | |
{ | |
break; | |
} | |
n = 10 * n + c - '0'; | |
} | |
t->t_intlit = n; | |
t->kind = T_intlit; | |
p->have_token = 1; | |
} | |
#define CHAR_TOKEN(ch, tkind) \ | |
else if (c == ch) \ | |
{ \ | |
consume_char(p); \ | |
t->kind = tkind; \ | |
p->have_token = 1; \ | |
} | |
CHAR_TOKEN('+', T_plus) | |
CHAR_TOKEN('-', T_minus) | |
CHAR_TOKEN('{', T_braceleft) | |
CHAR_TOKEN('}', T_braceright) | |
CHAR_TOKEN('(', T_parenleft) | |
CHAR_TOKEN(')', T_parenright) | |
CHAR_TOKEN('[', T_bracketleft) | |
CHAR_TOKEN(']', T_bracketright) | |
CHAR_TOKEN(';', T_semicolon) | |
CHAR_TOKEN(',', T_comma) | |
CHAR_TOKEN('.', T_dot) | |
else | |
{ | |
// TODO | |
} | |
return p->have_token; | |
} | |
void consume_token(Parse *p) | |
{ | |
assert(p->have_token); | |
p->have_token = 0; | |
} | |
void print_token(Token *t) | |
{ | |
if (t->kind == T_intlit) | |
{ | |
printf("(int) %" PRIi64, t->t_intlit); | |
} | |
else if (t->kind == T_name) | |
{ | |
printf("(name) %s", t->t_name); | |
} | |
else if (t->kind == T_plus) | |
{ | |
printf("(plus) +"); | |
} | |
else if (t->kind == T_minus) | |
{ | |
printf("(minus) -"); | |
} | |
else if (t->kind == T_if) | |
{ | |
printf("(if) if"); | |
} | |
else if (t->kind == T_debug) | |
{ | |
printf("(debug) debug"); | |
} | |
else | |
{ | |
printf("(unknown)"); | |
} | |
printf("\n"); | |
} | |
void expect_token(Parse *p, int kind) | |
{ | |
if (!look_token(p)) | |
{ | |
parse_error_f(p, "expected %s token, found end of file", | |
token_kind_string[kind]); | |
} | |
Token *t = &p->token; | |
if (t->kind != kind) | |
{ | |
parse_error_f(p, "expected %s token, found %s token.", | |
token_kind_string[kind], | |
token_kind_string[t->kind]); | |
} | |
consume_token(p); | |
} | |
Expr *parse_expr(Parse *p) | |
{ | |
if (!look_token(p)) | |
{ | |
fatal_f("Expected expression, found end of file"); | |
} | |
Token *t = &p->token; | |
Expr *e = NULL; | |
if (t->kind == T_name) | |
{ | |
e = alloc_expr(); | |
e->kind = EXPR_name; | |
e->t_token = *t; | |
consume_token(p); | |
} | |
else if (t->kind == T_intlit) | |
{ | |
e = alloc_expr(); | |
e->kind = EXPR_const; | |
e->t_token = *t; | |
consume_token(p); | |
} | |
else | |
{ | |
parse_error_f(p, "Failed to parse expression (got: %s token)", | |
token_kind_string[t->kind]); | |
} | |
for (;;) | |
{ | |
if (!look_token(p)) | |
{ | |
return e; | |
} | |
if (t->kind == T_plus | |
|| t->kind == T_minus) | |
{ | |
int opkind = (t->kind == T_plus) ? OP_ADD : OP_SUB; | |
consume_token(p); | |
Expr *left = e; | |
Expr *right = parse_expr(p); | |
e = alloc_expr(); | |
e->kind = EXPR_binop; | |
e->t_binop.opkind = opkind; | |
e->t_binop.left = left; | |
e->t_binop.right = right; | |
} | |
else if (t->kind == T_parenleft) | |
{ | |
Expr *callee = e; | |
consume_token(p); | |
// parse arguments. TODO support multiple arguments | |
Expr *arg = parse_expr(p); | |
expect_token(p, T_parenright); | |
e = alloc_expr(); | |
e->kind = EXPR_call; | |
e->t_call.callee_expr = callee; | |
e->t_call.arg_expr = arg; | |
} | |
else | |
{ | |
break; | |
} | |
} | |
return e; | |
} | |
Stmt *parse_stmt(Parse *p) | |
{ | |
if (!look_token(p)) | |
{ | |
assert(0); | |
} | |
Token *t = &p->token; | |
if (t->kind == T_if) | |
{ | |
consume_token(p); | |
expect_token(p, T_parenleft); | |
Expr *e = parse_expr(p); | |
expect_token(p, T_parenright); | |
Stmt *body = parse_stmt(p); | |
Stmt *stmt = alloc_stmt(); | |
stmt->kind = STMT_if; | |
stmt->t_if.cond_expr = e; | |
stmt->t_if.body_stmt = body; | |
stmt->t_if.else_stmt = NULL; //TODO | |
return stmt; | |
} | |
else if (t->kind == T_name | |
|| t->kind == T_intlit) | |
{ | |
Expr *e = parse_expr(p); | |
expect_token(p, T_semicolon); | |
Stmt *stmt = alloc_stmt(); | |
stmt->kind = STMT_expr; | |
stmt->t_expr = e; | |
return stmt; | |
} | |
else | |
{ | |
fatal_f("Failed to parse stmt: %s", | |
token_kind_string[t->kind]); | |
} | |
} | |
const char test_code[] = "if (8 - 3) 5 + 6;"; | |
// returns address where jump location should be stored. So it can be changed | |
// later. | |
int64_t *push_jmpif_instrs(ExecCtx *ctx, int cmp, int64_t jmploc) | |
{ | |
Instr *instr = ctx->instrs + ctx->num_instrs; | |
++ ctx->num_instrs; | |
instr->kind = INSTR_JMPIF; | |
instr->t_jmpif.kind = cmp; | |
instr->t_jmpif.pc = jmploc; | |
return &instr->t_jmp.pc; | |
} | |
void push_const_int_instr(ExecCtx *ctx, int64_t val) | |
{ | |
push_instr(ctx, (Instr) { INSTR_CONST, .t_int = val }); | |
} | |
void push_op_instr(ExecCtx *ctx, int kind) | |
{ | |
push_instr(ctx, (Instr) { INSTR_OP, .t_op = kind }); | |
} | |
void push_debug_instr(ExecCtx *ctx) | |
{ | |
push_instr(ctx, (Instr) { INSTR_DEBUG, }); | |
} | |
void push_expr_instrs(ExecCtx *ctx, Expr *expr) | |
{ | |
if (expr->kind == EXPR_const) | |
{ | |
Token *t = &expr->t_token; | |
assert(t->kind == T_intlit); //XXX | |
push_const_int_instr(ctx, t->t_intlit); | |
} | |
else if (expr->kind == EXPR_binop) | |
{ | |
push_expr_instrs(ctx, expr->t_binop.left); | |
push_expr_instrs(ctx, expr->t_binop.right); | |
push_op_instr(ctx, expr->t_binop.opkind); | |
} | |
else | |
{ | |
fatal_f("Not implemented"); | |
} | |
} | |
void push_stmt_instrs(ExecCtx *ctx, Stmt *stmt) | |
{ | |
if (stmt->kind == STMT_expr) | |
{ | |
push_expr_instrs(ctx, stmt->t_expr); | |
} | |
else if (stmt->kind == STMT_if) | |
{ | |
push_expr_instrs(ctx, stmt->t_if.cond_expr); | |
int64_t *jmploc = push_jmpif_instrs(ctx, CMP_EQ, -666); | |
push_stmt_instrs(ctx, stmt->t_if.body_stmt); | |
*jmploc = ctx->num_instrs; | |
} | |
else | |
{ | |
msg_f("Have a %s stmt", stmt_kind_string[stmt->kind]); | |
fatal_f("Not implemented: compile %s stmt", | |
stmt_kind_string[stmt->kind]); | |
} | |
} | |
int main(void) | |
{ | |
ExecCtx *ctx = &(ExecCtx) {0}; | |
Parse *p = &(Parse) | |
{ .buffer = test_code, .size = sizeof test_code - 1}; | |
Stmt *stmt = parse_stmt(p); | |
msg_f("parsed a %s stmt", stmt_kind_string[stmt->kind]); | |
push_stmt_instrs(ctx, stmt); | |
push_debug_instr(ctx); | |
interp(ctx); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment