Last active
November 29, 2016 21:44
-
-
Save yurapyon/ec73df10ac1b5841c132f610897d7cf3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
// util {{{ | |
typedef int bool; | |
const bool true = (2 == 2); | |
const bool false = (2 != 2); | |
typedef struct node { | |
struct node *next; | |
} node; | |
typedef struct { | |
node *head; | |
node *tail; | |
unsigned int len; | |
} list; | |
void | |
list_init(list *l) | |
{ | |
l->head = NULL; | |
l->tail = NULL; | |
l->len = 0; | |
} | |
void | |
list_push_t(list *l, node *n) | |
{ | |
if (l->len == 0) { | |
l->head = l->tail = n; | |
} else { | |
l->tail->next = n; | |
} | |
n->next = NULL; | |
l->tail = n; | |
l->len++; | |
} | |
node * | |
list_pop_h(list *l) | |
{ | |
if (l->len == 0) return NULL; | |
node *ret = l->head; | |
l->head = ret->next; | |
ret->next = NULL; | |
l->len--; | |
return ret; | |
} | |
char * | |
load_file(const char *filepath) | |
{ | |
FILE *f = fopen(filepath, "r"); | |
if (!f) return NULL; | |
fseek(f, 0, SEEK_END); | |
int len = ftell(f); | |
fseek(f, 0, SEEK_SET); | |
char *ret = malloc(sizeof(char) * len + 1); | |
if (!ret) { | |
fclose(f); | |
return NULL; | |
} | |
int status = fread(ret, sizeof(char), len, f); | |
if (!status) { | |
free(ret); | |
fclose(f); | |
return NULL; | |
} | |
ret[len] = '\0'; | |
fclose(f); | |
return ret; | |
} | |
// }}} | |
// tokens {{{ | |
enum { TOKEN_LPAREN | |
, TOKEN_RPAREN | |
, TOKEN_ATOM }; | |
typedef struct { | |
node n; | |
int type; | |
char *kw; | |
} token; | |
token * | |
token_alloc(int type, unsigned int atom_len) | |
{ | |
token *ret = malloc(sizeof(token)); | |
if (!ret) return NULL; | |
if (type == TOKEN_ATOM) { | |
ret->kw = malloc(sizeof(char) * atom_len); | |
if (!ret->kw) { | |
free(ret); | |
return NULL; | |
} | |
} else { | |
ret->kw = NULL; | |
} | |
ret->type = type; | |
return ret; | |
} | |
void | |
token_free(token *t) | |
{ | |
if (t->type == TOKEN_ATOM && t->kw) | |
free(t->kw); | |
free(t); | |
} | |
void | |
token_print(token *t) | |
{ | |
printf("token: "); | |
switch (t->type) { | |
case TOKEN_LPAREN: | |
printf("l paren\n"); | |
break; | |
case TOKEN_RPAREN: | |
printf("r paren\n"); | |
break; | |
case TOKEN_ATOM: | |
printf("atom %s\n", t->kw); | |
break; | |
default: | |
printf("bad token\n"); | |
break; | |
}; | |
} | |
// }}} | |
// token list {{{ | |
typedef list token_list; | |
// deinit also frees all tokens | |
void | |
token_list_deinit(token_list *tl) | |
{ | |
node *n = list_pop_h(tl); | |
for (; n;) { | |
token_free((token *)n); | |
n = list_pop_h(tl); | |
} | |
} | |
#define MAX_ATOM_LEN 65 | |
void | |
tokenize_lisp_string(char *str, token_list *tl) | |
{ | |
int kw_at = 0; | |
char temp_kw[MAX_ATOM_LEN]; | |
memset(temp_kw, '\0', MAX_ATOM_LEN); | |
for (int i = 0; ; ++i) { | |
token *t; | |
switch (str[i]) { | |
case '\0': | |
return; | |
break; | |
case ' ': case '\n': case '\t': | |
break; | |
case '(': | |
t = token_alloc(TOKEN_LPAREN, 0); | |
if (!t) return; | |
list_push_t((list *)tl, (node *)t); | |
break; | |
case ')': | |
t = token_alloc(TOKEN_RPAREN, 0); | |
if (!t) return; | |
list_push_t((list *)tl, (node *)t); | |
break; | |
default: { | |
for(;;) { | |
if ( str[i] == '(' | |
|| str[i] == ')' | |
|| str[i] == '\0' | |
|| str[i] == ' ' | |
|| str[i] == '\n' | |
|| str[i] == '\t') { | |
temp_kw[kw_at] = '\0'; | |
t = token_alloc(TOKEN_ATOM, kw_at + 1); | |
if (!t) return; | |
strncpy(t->kw, temp_kw, kw_at + 1); | |
list_push_t((list *)tl, (node *)t); | |
kw_at = 0; | |
memset(temp_kw, '\0', MAX_ATOM_LEN); | |
i--; // i is now "on top of" char after end of kw, so move it back | |
break; | |
} | |
if (kw_at < MAX_ATOM_LEN - 1) { | |
temp_kw[kw_at] = str[i]; | |
kw_at++; | |
} | |
i++; | |
} | |
} break; | |
} | |
} | |
} | |
// }}} | |
// lvals {{{ | |
typedef list lval_list; | |
void lval_list_deinit(lval_list *); | |
enum { LVAL_LIST | |
, LVAL_INT | |
, LVAL_KEYWORD }; | |
typedef struct { | |
node n; | |
int type; | |
union { | |
lval_list ll; | |
int i; | |
char *kw; | |
}; | |
} lval; | |
lval * | |
lval_alloc(int type) { | |
lval *ret = malloc(sizeof(lval)); | |
if (!ret) return NULL; | |
if (type == LVAL_KEYWORD) | |
ret->kw = NULL; | |
if (type == LVAL_LIST) | |
list_init(&ret->ll); | |
ret->type = type; | |
return ret; | |
} | |
void | |
lval_free(lval *lv) | |
{ | |
switch (lv->type) { | |
case LVAL_INT: | |
break; | |
case LVAL_KEYWORD: | |
if (lv->kw) { | |
free(lv->kw); | |
} | |
break; | |
case LVAL_LIST: | |
lval_list_deinit(&lv->ll); | |
break; | |
} | |
free(lv); | |
} | |
void | |
lval_print(lval *lv) | |
{ | |
switch (lv->type) { | |
case LVAL_LIST: | |
printf("list: ct %d\n", lv->ll.len); | |
break; | |
case LVAL_INT: | |
printf("int %d\n", lv->i); | |
break; | |
case LVAL_KEYWORD: | |
printf("kw %s\n", lv->kw); | |
break; | |
default: | |
printf("bad lval\n"); | |
break; | |
} | |
} | |
void | |
lval_list_deinit(lval_list *ll) | |
{ | |
node *n = list_pop_h(ll); | |
for (; n;) { | |
lval_free((lval *)n); | |
n = list_pop_h(ll); | |
} | |
} | |
bool | |
try_parse_int(char *str, int *out) | |
{ | |
char *temp = NULL; | |
long val = strtol(str, &temp, 0); | |
if (temp == str || *temp != '\0') { | |
*out = 0; | |
return false; | |
} | |
// TODO make sure it isnt over max | |
*out = (int)val; | |
return true; | |
} | |
lval * | |
token_to_lval(token_list *tl) | |
{ | |
lval *ret; | |
token *t = (token *)list_pop_h(tl); | |
if (!t) { | |
printf("unexpected EOF\n"); | |
return NULL; | |
} | |
switch (t->type) { | |
case TOKEN_LPAREN: | |
ret = lval_alloc(LVAL_LIST); | |
for (;;) { | |
if (tl->head && ((token *)tl->head)->type == TOKEN_RPAREN) break; | |
lval *temp = token_to_lval(tl); | |
if (!temp) { | |
lval_free(ret); | |
token_free(t); | |
return NULL; | |
} | |
list_push_t(&ret->ll, (node *)temp); | |
} | |
token_free((token *)list_pop_h(tl)); | |
token_free(t); | |
return ret; | |
break; | |
case TOKEN_RPAREN: | |
printf("unexpected rparen\n"); | |
token_free(t); | |
return NULL; | |
break; | |
case TOKEN_ATOM: { | |
int i; | |
bool is_int = try_parse_int(t->kw, &i); | |
if (!is_int) { | |
ret = lval_alloc(LVAL_KEYWORD); | |
// transfer token's str to ret | |
ret->kw = t->kw; | |
t->kw = NULL; | |
} else { | |
ret = lval_alloc(LVAL_INT); | |
ret->i = i; | |
} | |
token_free(t); | |
return ret; | |
} break; | |
default: | |
printf("bad token\n"); | |
token_free(t); | |
return NULL; | |
break; | |
} | |
// cant happen | |
printf("crazy unreal error!!!!! AAAHH\n"); | |
token_free(t); | |
return NULL; | |
} | |
// }}} | |
// ast {{{ | |
void | |
construct_ast(token_list *tl, lval_list *out) | |
{ | |
for (;;) { | |
if (tl->len == 0) break; | |
lval *temp = token_to_lval(tl); | |
if (!temp) { | |
lval_list_deinit(out); | |
return; | |
} | |
list_push_t(out, (node *)temp); | |
} | |
} | |
void | |
print_ast(lval_list *ll, int *indent) | |
{ | |
lval *lv; | |
for (node *n = ll->head; n; n = n -> next) { | |
lv = (lval *)n; | |
// indent! | |
int sz = *indent * 2 + 1; | |
char *spaces = malloc(sizeof(char) * sz); | |
if (!spaces) return; | |
memset(spaces, ' ', sz); | |
spaces[sz - 1] = '\0'; | |
printf("%s| ", spaces); | |
free(spaces); | |
lval_print(lv); | |
if (lv->type == LVAL_LIST) { | |
*indent += 1; | |
print_ast(&lv->ll, indent); | |
*indent -= 1; | |
} | |
} | |
} | |
// }}} | |
typedef int (* builtin_fn_ptr) (int, int); | |
typedef struct { | |
char *name; | |
builtin_fn_ptr fn; | |
} builtin; | |
int builtin_add(int a, int b) { return a + b; } | |
int builtin_sub(int a, int b) { return a - b; } | |
int builtin_mul(int a, int b) { return a * b; } | |
int builtin_div(int a, int b) { return a / b; } | |
builtin builtins[] = { | |
{"+", builtin_add} | |
, {"-", builtin_sub} | |
, {"*", builtin_mul} | |
, {"/", builtin_div} | |
, { NULL, NULL } | |
}; | |
builtin_fn_ptr | |
get_builtin(char *name) | |
{ | |
builtin_fn_ptr ret = NULL; | |
for (int i = 0; builtins[i].name != NULL; ++i) { | |
ret = builtins[i].fn; | |
if (!strcmp(builtins[i].name, name)) | |
goto found; | |
} | |
return NULL; | |
found: | |
return ret; | |
} | |
int | |
eval_lval(lval *l) | |
{ | |
switch(l->type) { | |
case LVAL_LIST: { | |
char *func = ((lval *)list_pop_h(&l->ll))->kw; | |
builtin_fn_ptr fn = get_builtin(func); | |
if (!fn) { | |
printf("err: fn \"%s\" not found\n", func); | |
return 0; | |
} | |
int to_eval_at = 0; | |
int to_eval[l->ll.len]; | |
for (node *n = l->ll.head; n; n = n->next) { | |
lval *lv = (lval *)n; | |
to_eval[to_eval_at] = eval_lval(lv); | |
to_eval_at += 1; | |
} | |
printf("evaluating func %s\n", func); | |
int answer = to_eval[0]; | |
for (unsigned int i = 1; i < l->ll.len; ++i) { | |
answer = fn(answer, to_eval[i]); | |
} | |
return answer; | |
} break; | |
case LVAL_INT: | |
return l->i; | |
break; | |
case LVAL_KEYWORD: | |
return 0; | |
break; | |
default: | |
printf("should not reach\n"); | |
break; | |
} | |
return 0; | |
} | |
int | |
main(void) | |
{ | |
token_list tl; | |
list_init(&tl); | |
char *file = load_file("test.l"); | |
if (file) { | |
tokenize_lisp_string(file, &tl); | |
free(file); | |
} else { | |
tokenize_lisp_string("(+ 1 2)", &tl); | |
} | |
// print tokens | |
token *t; | |
printf("tokens: count %d\n", tl.len); | |
for (node *n = tl.head; n; n = n->next) { | |
t = (token *)n; | |
token_print(t); | |
} | |
lval_list ast; | |
list_init(&ast); | |
printf("\nbuilding ast\n"); | |
construct_ast(&tl, &ast); | |
token_list_deinit(&tl); | |
if (ast.len == 0) { | |
printf("ast error\n"); | |
return 1; | |
} | |
printf("ast:\n"); | |
int indent = 0; | |
print_ast(&ast, &indent); | |
printf("\neval\n"); | |
int ans = eval_lval((lval *)ast.head); | |
printf("ans %d\n", ans); | |
lval_list_deinit(&ast); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment