yurapyon · November 29, 2016 21:44
diff --git a/lisp_parser.c b/lisp_parser.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 // util {{{

 typedef int bool;
 const bool true = (2 == 2);
 const bool false = (2 != 2);

 typedef struct node {
    struct node *next;
 } node;

 typedef struct {
    node *head;
    node *tail;
    unsigned int len;
 } list;

 void
 list_init(list *l)
 {
    l->head = NULL;
    l->tail = NULL;
    l->len = 0;
 }

 void
 list_push_t(list *l, node *n)
 {
    if (l->len == 0) {
        l->head = l->tail = n;
    } else {
        l->tail->next = n;
    }
    n->next = NULL;
    l->tail = n;
    l->len++;
 }

 node *
 list_pop_h(list *l)
 {
    if (l->len == 0) return NULL;
    node *ret = l->head;
    l->head = ret->next;
    ret->next = NULL;
    l->len--;
    return ret;
 }

 char *
 load_file(const char *filepath)
 {
    FILE *f = fopen(filepath, "r");
    if (!f) return NULL;

    fseek(f, 0, SEEK_END);
    int len = ftell(f);
    fseek(f, 0, SEEK_SET);

    char *ret = malloc(sizeof(char) * len + 1);
    if (!ret) {
        fclose(f);
        return NULL;
    }

    int status = fread(ret, sizeof(char), len, f);
    if (!status) {
        free(ret);
        fclose(f);
        return NULL;
    }

    ret[len] = '\0';

    fclose(f);
    return ret;
 }

 // }}}

 // tokens {{{

 enum { TOKEN_LPAREN
     , TOKEN_RPAREN
     , TOKEN_ATOM };

 typedef struct {
    node n;
    int type;
    char *kw;
 } token;

 token *
 token_alloc(int type, unsigned int atom_len)
 {
    token *ret = malloc(sizeof(token));
    if (!ret) return NULL;

    if (type == TOKEN_ATOM) {
        ret->kw = malloc(sizeof(char) * atom_len);
        if (!ret->kw) {
            free(ret);
            return NULL;
        }
    } else {
        ret->kw = NULL;
    }

    ret->type = type;
    return ret;
 }

 void
 token_free(token *t)
 {
    if (t->type == TOKEN_ATOM && t->kw)
        free(t->kw);
    free(t);
 }

 void
 token_print(token *t)
 {
    printf("token: ");
    switch (t->type) {
    case TOKEN_LPAREN:
        printf("l paren\n");
        break;
    case TOKEN_RPAREN:
        printf("r paren\n");
        break;
    case TOKEN_ATOM:
        printf("atom %s\n", t->kw);
        break;
    default:
        printf("bad token\n");
        break;
    };
 }

 // }}}

 // token list {{{

 typedef list token_list;

 // deinit also frees all tokens
 void
 token_list_deinit(token_list *tl)
 {
    node *n = list_pop_h(tl);
    for (; n;) {
        token_free((token *)n);
        n = list_pop_h(tl);
    }
 }

 #define MAX_ATOM_LEN 65

 void
 tokenize_lisp_string(char *str, token_list *tl)
 {
    int kw_at = 0;
    char temp_kw[MAX_ATOM_LEN];
    memset(temp_kw, '\0', MAX_ATOM_LEN);
    for (int i = 0; ; ++i) {
        token *t;
        switch (str[i]) {

        case '\0':
            return;
            break;

        case ' ': case '\n': case '\t':
            break;

        case '(':
            t = token_alloc(TOKEN_LPAREN, 0);
            if (!t) return;
            list_push_t((list *)tl, (node *)t);
            break;

        case ')':
            t = token_alloc(TOKEN_RPAREN, 0);
            if (!t) return;
            list_push_t((list *)tl, (node *)t);
            break;

        default: {
                for(;;) {
                    if (   str[i] == '('
                        || str[i] == ')'
                        || str[i] == '\0'
                        || str[i] == ' '
                        || str[i] == '\n'
                        || str[i] == '\t') {

                        temp_kw[kw_at] = '\0';

                        t = token_alloc(TOKEN_ATOM, kw_at + 1);
                        if (!t) return;
                        strncpy(t->kw, temp_kw, kw_at + 1);
                        list_push_t((list *)tl, (node *)t);

                        kw_at = 0;
                        memset(temp_kw, '\0', MAX_ATOM_LEN);
                        i--; // i is now "on top of" char after end of kw, so move it back
                        break;
                    }
                    if (kw_at < MAX_ATOM_LEN - 1) {
                        temp_kw[kw_at] = str[i];
                        kw_at++;
                    }
                    i++;
                }
            } break;
        }
    }
 }

 // }}}

 // lvals {{{

 typedef list lval_list;
 void lval_list_deinit(lval_list *);

 enum { LVAL_LIST
     , LVAL_INT
     , LVAL_KEYWORD };

 typedef struct {
    node n;
    int type;
    union {
        lval_list ll;
        int i;
        char *kw;
    };
 } lval;

 lval *
 lval_alloc(int type) {
    lval *ret = malloc(sizeof(lval));
    if (!ret) return NULL;

    if (type == LVAL_KEYWORD)
        ret->kw = NULL;

    if (type == LVAL_LIST)
        list_init(&ret->ll);

    ret->type = type;
    return ret;
 }

 void
 lval_free(lval *lv)
 {
    switch (lv->type) {
    case LVAL_INT:
        break;
    case LVAL_KEYWORD:
        if (lv->kw) {
            free(lv->kw);
        }
        break;
    case LVAL_LIST: 
        lval_list_deinit(&lv->ll);
        break;
    }
    free(lv);
 }

 void
 lval_print(lval *lv)
 {
    switch (lv->type) {
    case LVAL_LIST:
        printf("list: ct %d\n", lv->ll.len);
        break;
    case LVAL_INT:
        printf("int %d\n", lv->i);
        break;
    case LVAL_KEYWORD:
        printf("kw  %s\n", lv->kw);
        break;
    default:
        printf("bad lval\n");
        break;
    }
 }

 void
 lval_list_deinit(lval_list *ll)
 {
    node *n = list_pop_h(ll);
    for (; n;) {
        lval_free((lval *)n);
        n = list_pop_h(ll);
    }
 }

 bool
 try_parse_int(char *str, int *out)
 {
    char *temp = NULL;
    long val = strtol(str, &temp, 0);
    if (temp == str || *temp != '\0') {
        *out = 0;
        return false;
    }
    // TODO make sure it isnt over max
    *out = (int)val;
    return true;
 }

 lval *
 token_to_lval(token_list *tl)
 {
    lval *ret;
    token *t = (token *)list_pop_h(tl);
    if (!t) {
        printf("unexpected EOF\n");
        return NULL;
    }

    switch (t->type) {
    case TOKEN_LPAREN:
        ret = lval_alloc(LVAL_LIST);
        for (;;) {
            if (tl->head && ((token *)tl->head)->type == TOKEN_RPAREN) break;

            lval *temp = token_to_lval(tl);
            if (!temp) { 
                lval_free(ret);
                token_free(t);
                return NULL;
            }

            list_push_t(&ret->ll, (node *)temp);
        }
        token_free((token *)list_pop_h(tl));
        token_free(t);
        return ret;
        break;
    case TOKEN_RPAREN:
        printf("unexpected rparen\n");
        token_free(t);
        return NULL;
        break;
    case TOKEN_ATOM: {
            int i;
            bool is_int = try_parse_int(t->kw, &i);

            if (!is_int) {
                ret = lval_alloc(LVAL_KEYWORD);
                // transfer token's str to ret
                ret->kw = t->kw;
                t->kw = NULL;
            } else {
                ret = lval_alloc(LVAL_INT);
                ret->i = i;
            }

            token_free(t);
            return ret;
        } break;
    default:
        printf("bad token\n");
        token_free(t);
        return NULL;
        break;
    }

    // cant happen
    printf("crazy unreal error!!!!! AAAHH\n");
    token_free(t);
    return NULL;
 }

 // }}}

 // ast {{{

 void
 construct_ast(token_list *tl, lval_list *out)
 {
    for (;;) {
        if (tl->len == 0) break;
        lval *temp = token_to_lval(tl);
        if (!temp) { 
            lval_list_deinit(out);
            return;
        }

        list_push_t(out, (node *)temp);
    }
 }

 void
 print_ast(lval_list *ll, int *indent)
 {
    lval *lv;
    for (node *n = ll->head; n; n = n -> next) {
        lv = (lval *)n;

        // indent!
        int sz = *indent * 2 + 1;
        char *spaces = malloc(sizeof(char) * sz);
        if (!spaces) return;
        memset(spaces, ' ', sz);
        spaces[sz - 1] = '\0';
        printf("%s| ", spaces);
        free(spaces);

        lval_print(lv);

        if (lv->type == LVAL_LIST) {
            *indent += 1;
            print_ast(&lv->ll, indent);
            *indent -= 1;
        }
    }
 }

 // }}}

 typedef int (* builtin_fn_ptr) (int, int);
 typedef struct {
    char *name;
    builtin_fn_ptr fn;
 } builtin;

 int builtin_add(int a, int b) { return a + b; }
 int builtin_sub(int a, int b) { return a - b; }
 int builtin_mul(int a, int b) { return a * b; }
 int builtin_div(int a, int b) { return a / b; }

 builtin builtins[] = {
      {"+", builtin_add}
    , {"-", builtin_sub}
    , {"*", builtin_mul}
    , {"/", builtin_div}

    , { NULL, NULL }
 };

 builtin_fn_ptr
 get_builtin(char *name)
 {
    builtin_fn_ptr ret = NULL;
    for (int i = 0; builtins[i].name != NULL; ++i) {
        ret = builtins[i].fn;
        if (!strcmp(builtins[i].name, name))
            goto found;
    }
    return NULL;
 found:
    return ret;
 }

 int
 eval_lval(lval *l)
 {
    switch(l->type) {
    case LVAL_LIST: {

        char *func = ((lval *)list_pop_h(&l->ll))->kw;
        builtin_fn_ptr fn = get_builtin(func);
        if (!fn) {
            printf("err: fn \"%s\" not found\n", func);
            return 0;
        }

        int to_eval_at = 0;
        int to_eval[l->ll.len];
        for (node *n = l->ll.head; n; n = n->next) {
            lval *lv = (lval *)n;
            to_eval[to_eval_at] = eval_lval(lv);
            to_eval_at += 1;
        }

        printf("evaluating func %s\n", func);

        int answer = to_eval[0];
        for (unsigned int i = 1; i < l->ll.len; ++i) {
            answer = fn(answer, to_eval[i]);
        }

         return answer;

        } break;
    case LVAL_INT:
        return l->i;
        break;
    case LVAL_KEYWORD:
        return 0;
        break;
    default:
        printf("should not reach\n");
        break;
    }

    return 0;
 }

 int
 main(void)
 {
    token_list tl;
    list_init(&tl);

    char *file = load_file("test.l");
    if (file) {
        tokenize_lisp_string(file, &tl);
        free(file);
    } else {
        tokenize_lisp_string("(+ 1 2)", &tl);
    }

    // print tokens
    token *t;
    printf("tokens: count %d\n", tl.len);
    for (node *n = tl.head; n; n = n->next) {
        t = (token *)n;
        token_print(t);
    }

    lval_list ast;
    list_init(&ast);
    
    printf("\nbuilding ast\n");
    construct_ast(&tl, &ast);

    token_list_deinit(&tl);

    if (ast.len == 0) {
        printf("ast error\n");
        return 1;
    }

    printf("ast:\n");

    int indent = 0;
    print_ast(&ast, &indent);

    printf("\neval\n");
    int ans = eval_lval((lval *)ast.head);
    printf("ans %d\n", ans);

    lval_list_deinit(&ast);
    
    return 0;
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	// util {{{

	typedef int bool;
	const bool true = (2 == 2);
	const bool false = (2 != 2);

	typedef struct node {
	struct node *next;
	} node;

	typedef struct {
	node *head;
	node *tail;
	unsigned int len;
	} list;

	void
	list_init(list *l)
	{
	l->head = NULL;
	l->tail = NULL;
	l->len = 0;
	}

	void
	list_push_t(list l, node n)
	{
	if (l->len == 0) {
	l->head = l->tail = n;
	} else {
	l->tail->next = n;
	}
	n->next = NULL;
	l->tail = n;
	l->len++;
	}

	node *
	list_pop_h(list *l)
	{
	if (l->len == 0) return NULL;
	node *ret = l->head;
	l->head = ret->next;
	ret->next = NULL;
	l->len--;
	return ret;
	}

	char *
	load_file(const char *filepath)
	{
	FILE *f = fopen(filepath, "r");
	if (!f) return NULL;

	fseek(f, 0, SEEK_END);
	int len = ftell(f);
	fseek(f, 0, SEEK_SET);

	char ret = malloc(sizeof(char) len + 1);
	if (!ret) {
	fclose(f);
	return NULL;
	}

	int status = fread(ret, sizeof(char), len, f);
	if (!status) {
	free(ret);
	fclose(f);
	return NULL;
	}

	ret[len] = '\0';

	fclose(f);
	return ret;
	}

	// }}}

	// tokens {{{

	enum { TOKEN_LPAREN
	, TOKEN_RPAREN
	, TOKEN_ATOM };

	typedef struct {
	node n;
	int type;
	char *kw;
	} token;

	token *
	token_alloc(int type, unsigned int atom_len)
	{
	token *ret = malloc(sizeof(token));
	if (!ret) return NULL;

	if (type == TOKEN_ATOM) {
	ret->kw = malloc(sizeof(char) * atom_len);
	if (!ret->kw) {
	free(ret);
	return NULL;
	}
	} else {
	ret->kw = NULL;
	}

	ret->type = type;
	return ret;
	}

	void
	token_free(token *t)
	{
	if (t->type == TOKEN_ATOM && t->kw)
	free(t->kw);
	free(t);
	}

	void
	token_print(token *t)
	{
	printf("token: ");
	switch (t->type) {
	case TOKEN_LPAREN:
	printf("l paren\n");
	break;
	case TOKEN_RPAREN:
	printf("r paren\n");
	break;
	case TOKEN_ATOM:
	printf("atom %s\n", t->kw);
	break;
	default:
	printf("bad token\n");
	break;
	};
	}

	// }}}

	// token list {{{

	typedef list token_list;

	// deinit also frees all tokens
	void
	token_list_deinit(token_list *tl)
	{
	node *n = list_pop_h(tl);
	for (; n;) {
	token_free((token *)n);
	n = list_pop_h(tl);
	}
	}

	#define MAX_ATOM_LEN 65

	void
	tokenize_lisp_string(char str, token_list tl)
	{
	int kw_at = 0;
	char temp_kw[MAX_ATOM_LEN];
	memset(temp_kw, '\0', MAX_ATOM_LEN);
	for (int i = 0; ; ++i) {
	token *t;
	switch (str[i]) {

	case '\0':
	return;
	break;

	case ' ': case '\n': case '\t':
	break;

	case '(':
	t = token_alloc(TOKEN_LPAREN, 0);
	if (!t) return;
	list_push_t((list )tl, (node )t);
	break;

	case ')':
	t = token_alloc(TOKEN_RPAREN, 0);
	if (!t) return;
	list_push_t((list )tl, (node )t);
	break;

	default: {
	for(;;) {
	if ( str[i] == '('
	\|\| str[i] == ')'
	\|\| str[i] == '\0'
	\|\| str[i] == ' '
	\|\| str[i] == '\n'
	\|\| str[i] == '\t') {

	temp_kw[kw_at] = '\0';

	t = token_alloc(TOKEN_ATOM, kw_at + 1);
	if (!t) return;
	strncpy(t->kw, temp_kw, kw_at + 1);
	list_push_t((list )tl, (node )t);

	kw_at = 0;
	memset(temp_kw, '\0', MAX_ATOM_LEN);
	i--; // i is now "on top of" char after end of kw, so move it back
	break;
	}
	if (kw_at < MAX_ATOM_LEN - 1) {
	temp_kw[kw_at] = str[i];
	kw_at++;
	}
	i++;
	}
	} break;
	}
	}
	}

	// }}}

	// lvals {{{

	typedef list lval_list;
	void lval_list_deinit(lval_list *);

	enum { LVAL_LIST
	, LVAL_INT
	, LVAL_KEYWORD };

	typedef struct {
	node n;
	int type;
	union {
	lval_list ll;
	int i;
	char *kw;
	};
	} lval;

	lval *
	lval_alloc(int type) {
	lval *ret = malloc(sizeof(lval));
	if (!ret) return NULL;

	if (type == LVAL_KEYWORD)
	ret->kw = NULL;

	if (type == LVAL_LIST)
	list_init(&ret->ll);

	ret->type = type;
	return ret;
	}

	void
	lval_free(lval *lv)
	{
	switch (lv->type) {
	case LVAL_INT:
	break;
	case LVAL_KEYWORD:
	if (lv->kw) {
	free(lv->kw);
	}
	break;
	case LVAL_LIST:
	lval_list_deinit(&lv->ll);
	break;
	}
	free(lv);
	}

	void
	lval_print(lval *lv)
	{
	switch (lv->type) {
	case LVAL_LIST:
	printf("list: ct %d\n", lv->ll.len);
	break;
	case LVAL_INT:
	printf("int %d\n", lv->i);
	break;
	case LVAL_KEYWORD:
	printf("kw %s\n", lv->kw);
	break;
	default:
	printf("bad lval\n");
	break;
	}
	}

	void
	lval_list_deinit(lval_list *ll)
	{
	node *n = list_pop_h(ll);
	for (; n;) {
	lval_free((lval *)n);
	n = list_pop_h(ll);
	}
	}

	bool
	try_parse_int(char str, int out)
	{
	char *temp = NULL;
	long val = strtol(str, &temp, 0);
	if (temp == str \|\| *temp != '\0') {
	*out = 0;
	return false;
	}
	// TODO make sure it isnt over max
	*out = (int)val;
	return true;
	}

	lval *
	token_to_lval(token_list *tl)
	{
	lval *ret;
	token t = (token )list_pop_h(tl);
	if (!t) {
	printf("unexpected EOF\n");
	return NULL;
	}

	switch (t->type) {
	case TOKEN_LPAREN:
	ret = lval_alloc(LVAL_LIST);
	for (;;) {
	if (tl->head && ((token *)tl->head)->type == TOKEN_RPAREN) break;

	lval *temp = token_to_lval(tl);
	if (!temp) {
	lval_free(ret);
	token_free(t);
	return NULL;
	}

	list_push_t(&ret->ll, (node *)temp);
	}
	token_free((token *)list_pop_h(tl));
	token_free(t);
	return ret;
	break;
	case TOKEN_RPAREN:
	printf("unexpected rparen\n");
	token_free(t);
	return NULL;
	break;
	case TOKEN_ATOM: {
	int i;
	bool is_int = try_parse_int(t->kw, &i);

	if (!is_int) {
	ret = lval_alloc(LVAL_KEYWORD);
	// transfer token's str to ret
	ret->kw = t->kw;
	t->kw = NULL;
	} else {
	ret = lval_alloc(LVAL_INT);
	ret->i = i;
	}

	token_free(t);
	return ret;
	} break;
	default:
	printf("bad token\n");
	token_free(t);
	return NULL;
	break;
	}

	// cant happen
	printf("crazy unreal error!!!!! AAAHH\n");
	token_free(t);
	return NULL;
	}

	// }}}

	// ast {{{

	void
	construct_ast(token_list tl, lval_list out)
	{
	for (;;) {
	if (tl->len == 0) break;
	lval *temp = token_to_lval(tl);
	if (!temp) {
	lval_list_deinit(out);
	return;
	}

	list_push_t(out, (node *)temp);
	}
	}

	void
	print_ast(lval_list ll, int indent)
	{
	lval *lv;
	for (node *n = ll->head; n; n = n -> next) {
	lv = (lval *)n;

	// indent!
	int sz = indent 2 + 1;
	char spaces = malloc(sizeof(char) sz);
	if (!spaces) return;
	memset(spaces, ' ', sz);
	spaces[sz - 1] = '\0';
	printf("%s\| ", spaces);
	free(spaces);

	lval_print(lv);

	if (lv->type == LVAL_LIST) {
	*indent += 1;
	print_ast(&lv->ll, indent);
	*indent -= 1;
	}
	}
	}

	// }}}

	typedef int (* builtin_fn_ptr) (int, int);
	typedef struct {
	char *name;
	builtin_fn_ptr fn;
	} builtin;

	int builtin_add(int a, int b) { return a + b; }
	int builtin_sub(int a, int b) { return a - b; }
	int builtin_mul(int a, int b) { return a * b; }
	int builtin_div(int a, int b) { return a / b; }

	builtin builtins[] = {
	{"+", builtin_add}
	, {"-", builtin_sub}
	, {"*", builtin_mul}
	, {"/", builtin_div}

	, { NULL, NULL }
	};

	builtin_fn_ptr
	get_builtin(char *name)
	{
	builtin_fn_ptr ret = NULL;
	for (int i = 0; builtins[i].name != NULL; ++i) {
	ret = builtins[i].fn;
	if (!strcmp(builtins[i].name, name))
	goto found;
	}
	return NULL;
	found:
	return ret;
	}

	int
	eval_lval(lval *l)
	{
	switch(l->type) {
	case LVAL_LIST: {

	char func = ((lval )list_pop_h(&l->ll))->kw;
	builtin_fn_ptr fn = get_builtin(func);
	if (!fn) {
	printf("err: fn \"%s\" not found\n", func);
	return 0;
	}

	int to_eval_at = 0;
	int to_eval[l->ll.len];
	for (node *n = l->ll.head; n; n = n->next) {
	lval lv = (lval )n;
	to_eval[to_eval_at] = eval_lval(lv);
	to_eval_at += 1;
	}

	printf("evaluating func %s\n", func);

	int answer = to_eval[0];
	for (unsigned int i = 1; i < l->ll.len; ++i) {
	answer = fn(answer, to_eval[i]);
	}

	return answer;

	} break;
	case LVAL_INT:
	return l->i;
	break;
	case LVAL_KEYWORD:
	return 0;
	break;
	default:
	printf("should not reach\n");
	break;
	}

	return 0;
	}

	int
	main(void)
	{
	token_list tl;
	list_init(&tl);

	char *file = load_file("test.l");
	if (file) {
	tokenize_lisp_string(file, &tl);
	free(file);
	} else {
	tokenize_lisp_string("(+ 1 2)", &tl);
	}

	// print tokens
	token *t;
	printf("tokens: count %d\n", tl.len);
	for (node *n = tl.head; n; n = n->next) {
	t = (token *)n;
	token_print(t);
	}

	lval_list ast;
	list_init(&ast);

	printf("\nbuilding ast\n");
	construct_ast(&tl, &ast);

	token_list_deinit(&tl);

	if (ast.len == 0) {
	printf("ast error\n");
	return 1;
	}

	printf("ast:\n");

	int indent = 0;
	print_ast(&ast, &indent);

	printf("\neval\n");
	int ans = eval_lval((lval *)ast.head);
	printf("ans %d\n", ans);

	lval_list_deinit(&ast);

	return 0;
	}
No results found