Created
May 24, 2021 23:41
-
-
Save riicchhaarrd/ac6937dfcd0149b6994d9fccf5da3d1f to your computer and use it in GitHub Desktop.
lex example test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include "token.h" | |
| #include "rhd/heap_string.h" | |
| #include "rhd/linked_list.h" | |
| struct lexer | |
| { | |
| char *buf; | |
| int bufsz; | |
| int pos; | |
| struct token tk; | |
| int lineno; | |
| struct linked_list *tokens; | |
| }; | |
| int next(struct lexer *lex) | |
| { | |
| if(lex->pos + 1 >= lex->bufsz) | |
| return -1; | |
| return lex->buf[lex->pos++]; | |
| } | |
| heap_string next_match(struct lexer *lex, int (*cmp)(int)) | |
| { | |
| //undo the fetch from before | |
| --lex->pos; | |
| heap_string s = NULL; | |
| while(1) | |
| { | |
| int ch = next(lex); | |
| if(ch == -1 || !cmp(ch)) | |
| return s; | |
| heap_string_push(&s, ch); | |
| } | |
| return s; | |
| } | |
| static int match_test_ident(int ch) | |
| { | |
| return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'; | |
| } | |
| static int match_test_integer(int ch) | |
| { | |
| return ch >= '0' && ch <= '9'; | |
| } | |
| int token(struct lexer *lex, struct token *tk) | |
| { | |
| int ch; | |
| retry: | |
| tk->type = TK_INVALID; | |
| ch = next(lex); | |
| if(ch == -1) | |
| return 1; | |
| switch(ch) | |
| { | |
| case ' ': | |
| case '\t': | |
| case '\r': | |
| case '\n': | |
| goto retry; | |
| case '#': | |
| case '<': | |
| case '>': | |
| case '"': | |
| case '\'': | |
| case '{': | |
| case '}': | |
| case '/': | |
| case '*': | |
| case '[': | |
| case ']': | |
| case '&': | |
| case '^': | |
| case '|': | |
| case '!': | |
| case '-': | |
| case '+': | |
| case '(': | |
| case ')': | |
| case '=': | |
| case ';': | |
| case ':': | |
| case '\\': | |
| case ',': | |
| case '%': | |
| case '.': | |
| tk->type = ch; | |
| break; | |
| default: | |
| if(match_test_integer(ch)) | |
| { | |
| tk->type = TK_INTEGER; | |
| heap_string s = next_match(lex, match_test_integer); | |
| tk->integer = atoi(s); | |
| heap_string_free(&s); | |
| } else if(match_test_ident(ch)) | |
| { | |
| tk->type = TK_IDENT; | |
| heap_string s = next_match(lex, match_test_ident); | |
| //check whether this ident is a special ident | |
| if(!strcmp(s, "loop")) | |
| tk->type = TK_LOOP; | |
| else if(!strcmp(s, "if")) | |
| tk->type = TK_IF; | |
| snprintf(tk->string, sizeof(tk->string), "%s", s); | |
| heap_string_free(&s); | |
| } else | |
| { | |
| printf("got %c, unhandled error\n", ch); | |
| return 1; //error | |
| } | |
| break; | |
| } | |
| return 0; | |
| } | |
| void parse(heap_string data) | |
| { | |
| int len = strlen(data); | |
| struct lexer lex = { | |
| .buf = data, | |
| .bufsz = strlen(data) + 1, | |
| .pos = 0, | |
| .lineno = 0, | |
| .tokens = NULL | |
| }; | |
| lex.tokens = linked_list_create(struct token); | |
| struct token tk = {0}; | |
| for(int i = 0; i < len; ++i) | |
| { | |
| int ret = token(&lex, &tk); | |
| if(ret) | |
| { | |
| break; | |
| } | |
| //if(tk.type == TK_IDENT) | |
| //printf("token = %d (%s)\n", tk.type, tk.string); | |
| linked_list_append(lex.tokens, tk); | |
| } | |
| linked_list_reversed_foreach(lex.tokens, struct token*, it, | |
| { | |
| if(it->type == TK_IDENT) | |
| printf("]%s\n", it->string); | |
| }); | |
| linked_list_destroy(&lex.tokens); | |
| } | |
| /* | |
| int main(int argc, char **argv) | |
| { | |
| if(argc < 2) | |
| return 0; | |
| heap_string data = read_file(argv[1]); | |
| if(!data) | |
| { | |
| printf("failed to read file '%s'\n", argv[1]); | |
| return 1; | |
| } | |
| //printf("data = %s\n", data); | |
| parse(data); | |
| heap_string_free(&data); | |
| return 0; | |
| } | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment