Skip to content

Instantly share code, notes, and snippets.

@pervognsen
Last active October 23, 2022 10:54
Show Gist options
  • Save pervognsen/ee929714736745c81f01b9232d932b8d to your computer and use it in GitHub Desktop.
Save pervognsen/ee929714736745c81f01b9232d932b8d to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
void error(const char *str) {
MessageBoxA(NULL, str, "Error", MB_OK);
__debugbreak();
}
void errorf(const char *format, ...) {
char buffer[1024];
va_list args;
va_start(args, format);
vsprintf_s(buffer, sizeof(buffer), format, args);
va_end(args);
error(buffer);
}
const char *code;
enum {
MAX_SYMBOLS = 256
};
typedef struct {
const char *string;
int keyword : 1;
int variable : 31;
} symbol_t;
symbol_t symbols[MAX_SYMBOLS];
size_t num_symbols;
size_t num_variables;
symbol_t *intern(const char *start, const char *end) {
for (size_t i = 0; i < num_symbols; i++) {
const char *symbol_string = symbols[i].string;
const char *string = start;
while (*symbol_string != 0 && string != end && *symbol_string == *string) {
symbol_string++;
string++;
}
if (*symbol_string == 0 && string == end) {
return symbols + i;
}
}
if (num_symbols == MAX_SYMBOLS) {
error("symbol table overflow");
}
size_t size = end - start + 1;
char *new_symbol = (char *)malloc(size);
memcpy(new_symbol, start, size);
new_symbol[size - 1] = 0;
num_symbols++;
symbols[num_symbols-1].string = new_symbol;
symbols[num_symbols-1].keyword = 0;
symbols[num_symbols-1].variable = -1;
return symbols + num_symbols - 1;
}
typedef enum {
LIT,
GET,
SET,
POP,
JMP,
BRZ,
ADD,
SUB,
MUL,
DIV,
MOD,
NEG,
RET
} opcode_t;
char *emit_pointer;
void emit(uint8_t data) {
*emit_pointer++ = data;
}
void emit4(uint32_t data) {
*(uint32_t *)emit_pointer = data;
emit_pointer += 4;
}
char *ip;
int *sp;
int *fp;
int execute() {
for (;;) {
int op = *ip++;
switch (op) {
case LIT:
*sp++ = *(uint32_t *)ip;
ip += 4;
break;
case GET:
*sp++ = fp[*ip];
ip++;
break;
case SET:
fp[*ip] = sp[-1];
ip++;
sp--;
break;
case POP:
sp--;
break;
case JMP:
ip += *(int32_t *)ip;
break;
case BRZ:
if (sp[-1] == 0) {
ip += *(int32_t *)ip;
} else {
ip += 4;
}
sp--;
break;
case ADD:
sp[-2] += sp[-1];
sp--;
break;
case SUB:
sp[-2] -= sp[-1];
sp--;
break;
case MUL:
sp[-2] *= sp[-1];
sp--;
break;
case DIV:
sp[-2] /= sp[-1];
sp--;
break;
case MOD:
sp[-2] %= sp[-1];
sp--;
break;
case NEG:
sp[-1] = -sp[-1];
break;
case RET:
return sp[-1];
}
}
}
typedef enum {
TOKEN_NUMBER = 128,
TOKEN_IDENTIFIER,
TOKEN_EQUALS,
TOKEN_KEYWORD
} token_t;
token_t token;
int token_number;
symbol_t *token_symbol;
symbol_t *symbol_if;
symbol_t *symbol_else;
symbol_t *symbol_elseif;
symbol_t *symbol_while;
symbol_t *symbol_for;
symbol_t *symbol_return;
#define KEYWORD(name) \
symbol_##name = intern(#name, #name + strlen(#name)); \
symbol_##name->keyword = 1;
void initialize_keywords() {
KEYWORD(if);
KEYWORD(else);
KEYWORD(elseif);
KEYWORD(while);
KEYWORD(for);
KEYWORD(return);
}
void next_token() {
token_symbol = NULL;
while (isspace(*code)) {
code++;
}
if (isdigit(*code)) {
token_number = 0;
while (isdigit(*code)) {
token_number *= 10;
token_number += *code - '0';
code++;
}
token = TOKEN_NUMBER;
} else if (isalpha(*code)) {
const char *start = code;
while (isalnum(*code)) {
code++;
}
const char *end = code;
token_symbol = intern(start, end);
if (token_symbol->keyword)
token = TOKEN_KEYWORD;
else
token = TOKEN_IDENTIFIER;
} else {
switch (*code) {
case 0:
case '+':
case '-':
case '*':
case '/':
case '%':
case '&':
case '|':
case '^':
case '~':
case '!':
case '(':
case ')':
case '{':
case '}':
case ';':
token = (token_t)*code;
code++;
break;
case '=':
code++;
if (*code == '=') {
code++;
token = TOKEN_EQUALS;
} else {
token = (token_t)'=';
}
break;
default:
errorf("Unexpected initial token character: '%c'", *code);
break;
}
}
}
void expect_token(int expected_token) {
if (token != expected_token) {
errorf("Expected token %d, got %d.", expected_token, token);
}
next_token();
}
void expr();
// expr2 = number | identifier | '(' expr ')' | '-' expr2
void expr2() {
if (token == TOKEN_NUMBER) {
next_token();
emit(LIT);
emit4(token_number);
} else if (token == TOKEN_IDENTIFIER) {
if (token_symbol->variable == -1) {
token_symbol->variable = num_variables++;
}
int variable = token_symbol->variable;
next_token();
if (token == '=') {
next_token();
expr();
emit(SET);
emit((char)variable);
}
emit(GET);
emit((char)variable);
} else if (token == '(') {
next_token();
expr();
expect_token(')');
} else if (token == '-') {
next_token();
expr2();
emit(NEG);
} else {
errorf("Unexpected token at beginning of expression.");
}
}
typedef struct {
char *address;
char *reference;
} label_t;
void emit_label_reference(label_t *label) {
char *reference = emit_pointer;
if (label->address) {
emit4(label->address - reference);
} else {
emit4(label->reference - reference);
label->reference = reference;
}
}
void resolve_label(label_t *label) {
char *address = emit_pointer;
label->address = address;
char *reference = label->reference;
while (reference) {
int32_t offset = *(int32_t *)reference;
*(int32_t *)reference = address - reference;
reference += offset;
}
label->reference = NULL;
}
// expr1 = expr2 {'*' expr2 | '/' expr2 | '%' expr2}
void expr1() {
expr2();
while (token == '*' || token == '/' || token == '%') {
token_t op = token;
next_token();
expr2();
if (op == '*') {
emit(MUL);
} else if (op == '/') {
emit(DIV);
} else {
emit(MOD);
}
}
}
// expr = expr1 {'+' expr1 | '-' expr1}
void expr() {
expr1();
while (token == '+' || token == '-') {
token_t op = token;
next_token();
expr1();
if (op == '+') {
emit(ADD);
} else {
emit(SUB);
}
}
}
void stmt();
// stmt_block = '{' {stmt} '}'
void stmt_block() {
expect_token('{');
while (token && token != '}') {
stmt();
}
expect_token('}');
}
// stmt = 'if' expr stmt_block {'elseif' expr stmt_block} ['else' stmt_block]
// | 'while' expr stmt_block
// | 'return' expr ';'
// | stmt_block
// | expr ';'
void stmt() {
if (token_symbol == symbol_if) {
label_t next = {0};
label_t end = {0};
next_token();
expr();
emit(BRZ);
emit_label_reference(&next);
stmt_block();
while (token_symbol == symbol_elseif) {
next_token();
emit(JMP);
emit_label_reference(&end);
resolve_label(&next);
next.address = NULL;
expr();
emit(BRZ);
emit_label_reference(&next);
stmt_block();
}
if (token_symbol == symbol_else) {
next_token();
emit(JMP);
emit_label_reference(&end);
resolve_label(&next);
stmt_block();
}
resolve_label(&end);
} else if (token_symbol == symbol_while) {
next_token();
label_t start = {0};
resolve_label(&start);
expr();
emit(BRZ);
label_t end = {0};
emit_label_reference(&end);
stmt_block();
emit(JMP);
emit_label_reference(&start);
resolve_label(&end);
} else if (token_symbol == symbol_return) {
next_token();
expr();
expect_token(';');
emit(RET);
} else if (token == '{') {
stmt_block();
} else {
expr();
emit(POP);
expect_token(';');
}
}
int WINAPI WinMain(HINSTANCE instance, HINSTANCE prev_instance, LPSTR cmd_line, int cmd_show) {
char emit_buffer[1024];
emit_pointer = emit_buffer;
initialize_keywords();
// code = "{ x = 42; y = 1 + 3; return x + y; }";
// code = "{ x = 1; if 1 { x = 2; } return x+2; }";
// code = "{ x = 1; if 0 { x = 2; } else { x = 3; } return x+2; }";
code = "{ x = 1; if 0 { x = 2; } elseif 1 { x = 3; } else { x = 4; } return x+2; }";
code = "{ x = 1; n = 3; while n { x = x * 2; n = n - 1; } return x; }";
next_token();
stmt();
int frame[1024];
int stack[1024];
ip = emit_buffer;
fp = frame;
sp = stack;
int val = execute();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment