riicchhaarrd · May 24, 2021 23:41
diff --git a/lex.c b/lex.c
 #include <stdio.h>
 #include <stdlib.h>

 #include "token.h"
 #include "rhd/heap_string.h"
 #include "rhd/linked_list.h"

 struct lexer
 {
    char *buf;
    int bufsz;
    int pos;
    struct token tk;
    int lineno;
    struct linked_list *tokens;
 };

 int next(struct lexer *lex)
 {
    if(lex->pos + 1 >= lex->bufsz)
 	return -1;
    return lex->buf[lex->pos++];
 }

 heap_string next_match(struct lexer *lex, int (*cmp)(int))
 {
    //undo the fetch from before
    --lex->pos;
    
    heap_string s = NULL;
    while(1)
    {
 	int ch = next(lex);
 	if(ch == -1 || !cmp(ch))
 	    return s;
 	heap_string_push(&s, ch);
    }
    return s;
 }

 static int match_test_ident(int ch)
 {
    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_';
 }

 static int match_test_integer(int ch)
 {
    return ch >= '0' && ch <= '9';
 }

 int token(struct lexer *lex, struct token *tk)
 {
    int ch;
 retry:
    tk->type = TK_INVALID;
    ch = next(lex);
    if(ch == -1)
 	return 1;
    
    switch(ch)
    {
 	case ' ':
 	case '\t':
 	case '\r':
 	case '\n':
 	    goto retry;

 	case '#':
 	case '<':
 	case '>':
 	case '"':
 	case '\'':
 	case '{':
 	case '}':
 	case '/':
 	case '*':
 	case '[':
 	case ']':
 	case '&':
 	case '^':
 	case '|':
 	case '!':
 	case '-':
 	case '+':
 	case '(':
 	case ')':
 	case '=':
 	case ';':
 	case ':':
 	case '\\':
 	case ',':
 	case '%':
 	case '.':
 	    tk->type = ch;
 	    break;

 	default:
 	    if(match_test_integer(ch))
 	    {
 		tk->type = TK_INTEGER;
 		heap_string s = next_match(lex, match_test_integer);
 		tk->integer = atoi(s);
 		heap_string_free(&s);
 	    } else if(match_test_ident(ch))
 	    {
 		tk->type = TK_IDENT;
 		heap_string s = next_match(lex, match_test_ident);
 		//check whether this ident is a special ident
 		if(!strcmp(s, "loop"))
 		    tk->type = TK_LOOP;
 		else if(!strcmp(s, "if"))
 		    tk->type = TK_IF;
 		snprintf(tk->string, sizeof(tk->string), "%s", s);
 		heap_string_free(&s);
 	    } else
 	    {
 		printf("got %c, unhandled error\n", ch);
 		return 1; //error
 	    }
 	    break;
    }
    return 0;
 }

 void parse(heap_string data)
 {
    int len = strlen(data);

    struct lexer lex = {
 	.buf = data,
 	.bufsz = strlen(data) + 1,
 	.pos = 0,
 	.lineno = 0,
 	.tokens = NULL
    };

    lex.tokens = linked_list_create(struct token);

    struct token tk = {0};

    for(int i = 0; i < len; ++i)
    {
 	int ret = token(&lex, &tk);
 	if(ret)
 	{
 	    break;
 	}
 	//if(tk.type == TK_IDENT)
 	//printf("token = %d (%s)\n", tk.type, tk.string);
 	linked_list_append(lex.tokens, tk);
    }

    linked_list_reversed_foreach(lex.tokens, struct token*, it,
    {
 	    if(it->type == TK_IDENT)
 		printf("]%s\n", it->string);
    });

    linked_list_destroy(&lex.tokens);
 }
 /*
 int main(int argc, char **argv)
 {
    if(argc < 2)
 	return 0;
    heap_string data = read_file(argv[1]);

    if(!data)
    {
 	printf("failed to read file '%s'\n", argv[1]);
 	return 1;
    }

    //printf("data = %s\n", data);
    parse(data);
    heap_string_free(&data);
    return 0;
 }
 */
	#include <stdio.h>
	#include <stdlib.h>

	#include "token.h"
	#include "rhd/heap_string.h"
	#include "rhd/linked_list.h"

	struct lexer
	{
	char *buf;
	int bufsz;
	int pos;
	struct token tk;
	int lineno;
	struct linked_list *tokens;
	};

	int next(struct lexer *lex)
	{
	if(lex->pos + 1 >= lex->bufsz)
	return -1;
	return lex->buf[lex->pos++];
	}

	heap_string next_match(struct lexer lex, int (cmp)(int))
	{
	//undo the fetch from before
	--lex->pos;

	heap_string s = NULL;
	while(1)
	{
	int ch = next(lex);
	if(ch == -1 \|\| !cmp(ch))
	return s;
	heap_string_push(&s, ch);
	}
	return s;
	}

	static int match_test_ident(int ch)
	{
	return (ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z') \|\| ch == '_';
	}

	static int match_test_integer(int ch)
	{
	return ch >= '0' && ch <= '9';
	}

	int token(struct lexer lex, struct token tk)
	{
	int ch;
	retry:
	tk->type = TK_INVALID;
	ch = next(lex);
	if(ch == -1)
	return 1;

	switch(ch)
	{
	case ' ':
	case '\t':
	case '\r':
	case '\n':
	goto retry;

	case '#':
	case '<':
	case '>':
	case '"':
	case '\'':
	case '{':
	case '}':
	case '/':
	case '*':
	case '[':
	case ']':
	case '&':
	case '^':
	case '\|':
	case '!':
	case '-':
	case '+':
	case '(':
	case ')':
	case '=':
	case ';':
	case ':':
	case '\\':
	case ',':
	case '%':
	case '.':
	tk->type = ch;
	break;

	default:
	if(match_test_integer(ch))
	{
	tk->type = TK_INTEGER;
	heap_string s = next_match(lex, match_test_integer);
	tk->integer = atoi(s);
	heap_string_free(&s);
	} else if(match_test_ident(ch))
	{
	tk->type = TK_IDENT;
	heap_string s = next_match(lex, match_test_ident);
	//check whether this ident is a special ident
	if(!strcmp(s, "loop"))
	tk->type = TK_LOOP;
	else if(!strcmp(s, "if"))
	tk->type = TK_IF;
	snprintf(tk->string, sizeof(tk->string), "%s", s);
	heap_string_free(&s);
	} else
	{
	printf("got %c, unhandled error\n", ch);
	return 1; //error
	}
	break;
	}
	return 0;
	}

	void parse(heap_string data)
	{
	int len = strlen(data);

	struct lexer lex = {
	.buf = data,
	.bufsz = strlen(data) + 1,
	.pos = 0,
	.lineno = 0,
	.tokens = NULL
	};

	lex.tokens = linked_list_create(struct token);

	struct token tk = {0};

	for(int i = 0; i < len; ++i)
	{
	int ret = token(&lex, &tk);
	if(ret)
	{
	break;
	}
	//if(tk.type == TK_IDENT)
	//printf("token = %d (%s)\n", tk.type, tk.string);
	linked_list_append(lex.tokens, tk);
	}

	linked_list_reversed_foreach(lex.tokens, struct token*, it,
	{
	if(it->type == TK_IDENT)
	printf("]%s\n", it->string);
	});

	linked_list_destroy(&lex.tokens);
	}
	/*
	int main(int argc, char **argv)
	{
	if(argc < 2)
	return 0;
	heap_string data = read_file(argv[1]);

	if(!data)
	{
	printf("failed to read file '%s'\n", argv[1]);
	return 1;
	}

	//printf("data = %s\n", data);
	parse(data);
	heap_string_free(&data);
	return 0;
	}
	*/
No results found