Last active
October 23, 2022 10:54
-
-
Save pervognsen/ee929714736745c81f01b9232d932b8d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#include <stdarg.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#define WIN32_LEAN_AND_MEAN | |
#include <windows.h> | |
void error(const char *str) { | |
MessageBoxA(NULL, str, "Error", MB_OK); | |
__debugbreak(); | |
} | |
void errorf(const char *format, ...) { | |
char buffer[1024]; | |
va_list args; | |
va_start(args, format); | |
vsprintf_s(buffer, sizeof(buffer), format, args); | |
va_end(args); | |
error(buffer); | |
} | |
const char *code; | |
enum { | |
MAX_SYMBOLS = 256 | |
}; | |
typedef struct { | |
const char *string; | |
int keyword : 1; | |
int variable : 31; | |
} symbol_t; | |
symbol_t symbols[MAX_SYMBOLS]; | |
size_t num_symbols; | |
size_t num_variables; | |
symbol_t *intern(const char *start, const char *end) { | |
for (size_t i = 0; i < num_symbols; i++) { | |
const char *symbol_string = symbols[i].string; | |
const char *string = start; | |
while (*symbol_string != 0 && string != end && *symbol_string == *string) { | |
symbol_string++; | |
string++; | |
} | |
if (*symbol_string == 0 && string == end) { | |
return symbols + i; | |
} | |
} | |
if (num_symbols == MAX_SYMBOLS) { | |
error("symbol table overflow"); | |
} | |
size_t size = end - start + 1; | |
char *new_symbol = (char *)malloc(size); | |
memcpy(new_symbol, start, size); | |
new_symbol[size - 1] = 0; | |
num_symbols++; | |
symbols[num_symbols-1].string = new_symbol; | |
symbols[num_symbols-1].keyword = 0; | |
symbols[num_symbols-1].variable = -1; | |
return symbols + num_symbols - 1; | |
} | |
typedef enum { | |
LIT, | |
GET, | |
SET, | |
POP, | |
JMP, | |
BRZ, | |
ADD, | |
SUB, | |
MUL, | |
DIV, | |
MOD, | |
NEG, | |
RET | |
} opcode_t; | |
char *emit_pointer; | |
void emit(uint8_t data) { | |
*emit_pointer++ = data; | |
} | |
void emit4(uint32_t data) { | |
*(uint32_t *)emit_pointer = data; | |
emit_pointer += 4; | |
} | |
char *ip; | |
int *sp; | |
int *fp; | |
int execute() { | |
for (;;) { | |
int op = *ip++; | |
switch (op) { | |
case LIT: | |
*sp++ = *(uint32_t *)ip; | |
ip += 4; | |
break; | |
case GET: | |
*sp++ = fp[*ip]; | |
ip++; | |
break; | |
case SET: | |
fp[*ip] = sp[-1]; | |
ip++; | |
sp--; | |
break; | |
case POP: | |
sp--; | |
break; | |
case JMP: | |
ip += *(int32_t *)ip; | |
break; | |
case BRZ: | |
if (sp[-1] == 0) { | |
ip += *(int32_t *)ip; | |
} else { | |
ip += 4; | |
} | |
sp--; | |
break; | |
case ADD: | |
sp[-2] += sp[-1]; | |
sp--; | |
break; | |
case SUB: | |
sp[-2] -= sp[-1]; | |
sp--; | |
break; | |
case MUL: | |
sp[-2] *= sp[-1]; | |
sp--; | |
break; | |
case DIV: | |
sp[-2] /= sp[-1]; | |
sp--; | |
break; | |
case MOD: | |
sp[-2] %= sp[-1]; | |
sp--; | |
break; | |
case NEG: | |
sp[-1] = -sp[-1]; | |
break; | |
case RET: | |
return sp[-1]; | |
} | |
} | |
} | |
typedef enum { | |
TOKEN_NUMBER = 128, | |
TOKEN_IDENTIFIER, | |
TOKEN_EQUALS, | |
TOKEN_KEYWORD | |
} token_t; | |
token_t token; | |
int token_number; | |
symbol_t *token_symbol; | |
symbol_t *symbol_if; | |
symbol_t *symbol_else; | |
symbol_t *symbol_elseif; | |
symbol_t *symbol_while; | |
symbol_t *symbol_for; | |
symbol_t *symbol_return; | |
#define KEYWORD(name) \ | |
symbol_##name = intern(#name, #name + strlen(#name)); \ | |
symbol_##name->keyword = 1; | |
void initialize_keywords() { | |
KEYWORD(if); | |
KEYWORD(else); | |
KEYWORD(elseif); | |
KEYWORD(while); | |
KEYWORD(for); | |
KEYWORD(return); | |
} | |
void next_token() { | |
token_symbol = NULL; | |
while (isspace(*code)) { | |
code++; | |
} | |
if (isdigit(*code)) { | |
token_number = 0; | |
while (isdigit(*code)) { | |
token_number *= 10; | |
token_number += *code - '0'; | |
code++; | |
} | |
token = TOKEN_NUMBER; | |
} else if (isalpha(*code)) { | |
const char *start = code; | |
while (isalnum(*code)) { | |
code++; | |
} | |
const char *end = code; | |
token_symbol = intern(start, end); | |
if (token_symbol->keyword) | |
token = TOKEN_KEYWORD; | |
else | |
token = TOKEN_IDENTIFIER; | |
} else { | |
switch (*code) { | |
case 0: | |
case '+': | |
case '-': | |
case '*': | |
case '/': | |
case '%': | |
case '&': | |
case '|': | |
case '^': | |
case '~': | |
case '!': | |
case '(': | |
case ')': | |
case '{': | |
case '}': | |
case ';': | |
token = (token_t)*code; | |
code++; | |
break; | |
case '=': | |
code++; | |
if (*code == '=') { | |
code++; | |
token = TOKEN_EQUALS; | |
} else { | |
token = (token_t)'='; | |
} | |
break; | |
default: | |
errorf("Unexpected initial token character: '%c'", *code); | |
break; | |
} | |
} | |
} | |
void expect_token(int expected_token) { | |
if (token != expected_token) { | |
errorf("Expected token %d, got %d.", expected_token, token); | |
} | |
next_token(); | |
} | |
void expr(); | |
// expr2 = number | identifier | '(' expr ')' | '-' expr2 | |
void expr2() { | |
if (token == TOKEN_NUMBER) { | |
next_token(); | |
emit(LIT); | |
emit4(token_number); | |
} else if (token == TOKEN_IDENTIFIER) { | |
if (token_symbol->variable == -1) { | |
token_symbol->variable = num_variables++; | |
} | |
int variable = token_symbol->variable; | |
next_token(); | |
if (token == '=') { | |
next_token(); | |
expr(); | |
emit(SET); | |
emit((char)variable); | |
} | |
emit(GET); | |
emit((char)variable); | |
} else if (token == '(') { | |
next_token(); | |
expr(); | |
expect_token(')'); | |
} else if (token == '-') { | |
next_token(); | |
expr2(); | |
emit(NEG); | |
} else { | |
errorf("Unexpected token at beginning of expression."); | |
} | |
} | |
typedef struct { | |
char *address; | |
char *reference; | |
} label_t; | |
void emit_label_reference(label_t *label) { | |
char *reference = emit_pointer; | |
if (label->address) { | |
emit4(label->address - reference); | |
} else { | |
emit4(label->reference - reference); | |
label->reference = reference; | |
} | |
} | |
void resolve_label(label_t *label) { | |
char *address = emit_pointer; | |
label->address = address; | |
char *reference = label->reference; | |
while (reference) { | |
int32_t offset = *(int32_t *)reference; | |
*(int32_t *)reference = address - reference; | |
reference += offset; | |
} | |
label->reference = NULL; | |
} | |
// expr1 = expr2 {'*' expr2 | '/' expr2 | '%' expr2} | |
void expr1() { | |
expr2(); | |
while (token == '*' || token == '/' || token == '%') { | |
token_t op = token; | |
next_token(); | |
expr2(); | |
if (op == '*') { | |
emit(MUL); | |
} else if (op == '/') { | |
emit(DIV); | |
} else { | |
emit(MOD); | |
} | |
} | |
} | |
// expr = expr1 {'+' expr1 | '-' expr1} | |
void expr() { | |
expr1(); | |
while (token == '+' || token == '-') { | |
token_t op = token; | |
next_token(); | |
expr1(); | |
if (op == '+') { | |
emit(ADD); | |
} else { | |
emit(SUB); | |
} | |
} | |
} | |
void stmt(); | |
// stmt_block = '{' {stmt} '}' | |
void stmt_block() { | |
expect_token('{'); | |
while (token && token != '}') { | |
stmt(); | |
} | |
expect_token('}'); | |
} | |
// stmt = 'if' expr stmt_block {'elseif' expr stmt_block} ['else' stmt_block] | |
// | 'while' expr stmt_block | |
// | 'return' expr ';' | |
// | stmt_block | |
// | expr ';' | |
void stmt() { | |
if (token_symbol == symbol_if) { | |
label_t next = {0}; | |
label_t end = {0}; | |
next_token(); | |
expr(); | |
emit(BRZ); | |
emit_label_reference(&next); | |
stmt_block(); | |
while (token_symbol == symbol_elseif) { | |
next_token(); | |
emit(JMP); | |
emit_label_reference(&end); | |
resolve_label(&next); | |
next.address = NULL; | |
expr(); | |
emit(BRZ); | |
emit_label_reference(&next); | |
stmt_block(); | |
} | |
if (token_symbol == symbol_else) { | |
next_token(); | |
emit(JMP); | |
emit_label_reference(&end); | |
resolve_label(&next); | |
stmt_block(); | |
} | |
resolve_label(&end); | |
} else if (token_symbol == symbol_while) { | |
next_token(); | |
label_t start = {0}; | |
resolve_label(&start); | |
expr(); | |
emit(BRZ); | |
label_t end = {0}; | |
emit_label_reference(&end); | |
stmt_block(); | |
emit(JMP); | |
emit_label_reference(&start); | |
resolve_label(&end); | |
} else if (token_symbol == symbol_return) { | |
next_token(); | |
expr(); | |
expect_token(';'); | |
emit(RET); | |
} else if (token == '{') { | |
stmt_block(); | |
} else { | |
expr(); | |
emit(POP); | |
expect_token(';'); | |
} | |
} | |
int WINAPI WinMain(HINSTANCE instance, HINSTANCE prev_instance, LPSTR cmd_line, int cmd_show) { | |
char emit_buffer[1024]; | |
emit_pointer = emit_buffer; | |
initialize_keywords(); | |
// code = "{ x = 42; y = 1 + 3; return x + y; }"; | |
// code = "{ x = 1; if 1 { x = 2; } return x+2; }"; | |
// code = "{ x = 1; if 0 { x = 2; } else { x = 3; } return x+2; }"; | |
code = "{ x = 1; if 0 { x = 2; } elseif 1 { x = 3; } else { x = 4; } return x+2; }"; | |
code = "{ x = 1; n = 3; while n { x = x * 2; n = n - 1; } return x; }"; | |
next_token(); | |
stmt(); | |
int frame[1024]; | |
int stack[1024]; | |
ip = emit_buffer; | |
fp = frame; | |
sp = stack; | |
int val = execute(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment