Last active
October 28, 2022 18:14
-
-
Save RealNeGate/bc7b5f9d0a69aa50b9f2b6a6e162bb5a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define _CRT_SECURE_NO_WARNINGS | |
| #include <stdio.h> | |
| #define SWEETIE_IMPL | |
| #include "sweetie.h" | |
| static char* read_entire_file(const char* filepath) { | |
| FILE* file = fopen(filepath, "rb"); | |
| if (file == NULL) { | |
| fprintf(stderr, "error: could not open '%s'\n", filepath); | |
| return NULL; | |
| } | |
| // go to the end of the file and figure out how far that is | |
| fseek(file, 0L, SEEK_END); | |
| long file_length = ftell(file); | |
| char* buffer = malloc(file_length + 1); | |
| // go back to the start | |
| fseek(file, 0L, SEEK_SET); | |
| fread(buffer, sizeof(char), file_length, file); | |
| buffer[file_length] = '\0'; | |
| fclose(file); | |
| return buffer; | |
| } | |
| int main(void) { | |
| Swt_String str; | |
| str.data = read_entire_file("test.txt"); | |
| str.length = strlen(str.data); | |
| Swt_Unit unit = { 0 }; | |
| unit.node_capacity = 1024; | |
| unit.nodes = malloc(1024 * sizeof(Swt_Node)); | |
| unit.string_table_capacity = 4096; | |
| unit.string_table = malloc(4096); | |
| swt_parse(&unit, str); | |
| swt_print(&unit, &unit.nodes[0], 0); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #pragma once | |
| #ifndef SWEETIE_H | |
| #define SWEETIE_H | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <stdbool.h> | |
| #include <string.h> | |
| #include <assert.h> | |
| #define SWEETIE_API extern | |
| //////////////////////////////// | |
| // API | |
| //////////////////////////////// | |
| typedef struct { | |
| size_t length; | |
| const char* data; | |
| } Swt_String; | |
| // intrusive linked list | |
| typedef struct Swt_Node { | |
| enum Swt_NodeTag { | |
| SWT_NODE_NIL, | |
| SWT_NODE_LIST, | |
| SWT_NODE_INT, | |
| SWT_NODE_WORD, | |
| } tag; | |
| struct Swt_Node* next; | |
| union { | |
| struct Swt_Node* kid; | |
| const char* str; | |
| int num; | |
| }; | |
| } Swt_Node; | |
| typedef struct Swt_Unit Swt_Unit; | |
| #define SWT_FOR_KIDS(it, node) \ | |
| if ((node)->tag == SWT_NODE_LIST) for (Swt_Node *it = (node)->kid; it != NULL; it = it->next) | |
| inline static bool swt_match_word(Swt_Node* n, const char* str) { | |
| return n->tag == SWT_NODE_WORD && strcmp(n->str, str) == 0; | |
| } | |
| SWEETIE_API void swt_print(Swt_Unit* unit, Swt_Node* n, int depth); | |
| SWEETIE_API Swt_Node* swt_parse(Swt_Unit* unit, Swt_String contents); | |
| //////////////////////////////// | |
| // Private structures | |
| //////////////////////////////// | |
| struct Swt_Unit { | |
| size_t node_capacity; | |
| size_t node_count; | |
| Swt_Node* nodes; | |
| size_t string_table_length; | |
| size_t string_table_capacity; | |
| char* string_table; | |
| }; | |
| #endif /* SWEETIE_H */ | |
| //////////////////////////////// | |
| // Implementation | |
| //////////////////////////////// | |
| #ifdef SWEETIE_IMPL | |
| static Swt_Node* swt__parse_infix(Swt_Unit* unit, Swt_String* line); | |
| static bool swt__isnormal(char ch) { | |
| return ch != ' ' && ch != '\n' && ch != '\r' | |
| && ch != '{' && ch != '}' | |
| && ch != '(' && ch != ')' | |
| && ch != '[' && ch != ']'; | |
| } | |
| static bool swt__iswhitespace(char ch) { | |
| return ch == ' ' || ch == '\t'; | |
| } | |
| // returns the region it just advanced away from | |
| static Swt_String swt__advance(Swt_String* contents, size_t adv) { | |
| assert(contents->length >= adv); | |
| Swt_String s = { adv, contents->data }; | |
| contents->data += adv; | |
| contents->length -= adv; | |
| return s; | |
| } | |
| static bool swt__equal(Swt_String contents, size_t length, const char* data) { | |
| return contents.length == length && memcmp(contents.data, data, length) == 0; | |
| } | |
| static Swt_String swt__eat_whitespace(Swt_String* contents) { | |
| size_t i = 0; | |
| while (i < contents->length && swt__iswhitespace(contents->data[i])) { | |
| i += 1; | |
| } | |
| return swt__advance(contents, i); | |
| } | |
| static Swt_String swt__eat_line(Swt_String* contents) { | |
| size_t i = 0; | |
| while (i < contents->length && (contents->data[i] == '\r' || contents->data[i] == '\n')) { | |
| i += 1; | |
| } | |
| return swt__advance(contents, i); | |
| } | |
| static Swt_String swt__next_token(Swt_String* contents) { | |
| retry: | |
| if (contents->length == 0) { | |
| return (Swt_String){ 0 }; | |
| } | |
| switch (contents->data[0]) { | |
| // special tokens | |
| case '(': case ')': | |
| case '{': case '}': | |
| case '[': case ']': | |
| return swt__advance(contents, 1); | |
| case ';': | |
| size_t i = 0; | |
| while (i < contents->length && contents->data[i] != '\n') { | |
| i += 1; | |
| } | |
| swt__advance(contents, i); | |
| goto retry; | |
| default: { | |
| Swt_String token = { 0 }; | |
| size_t i = 0; | |
| while (i < contents->length && swt__isnormal(contents->data[i])) { | |
| i += 1; | |
| } | |
| return swt__advance(contents, i); | |
| } | |
| } | |
| } | |
| static const char* swt__alloc_str(Swt_Unit* unit, Swt_String str) { | |
| assert(unit->string_table_length + str.length < unit->string_table_capacity); | |
| ptrdiff_t pos = unit->string_table_length; | |
| memcpy(&unit->string_table[pos], str.data, str.length); | |
| unit->string_table[pos + str.length] = 0; | |
| unit->string_table_length += str.length + 1; | |
| return &unit->string_table[pos]; | |
| } | |
| static Swt_Node* swt__push(Swt_Unit* unit) { | |
| assert(unit->node_count < unit->node_capacity); | |
| return &unit->nodes[unit->node_count++]; | |
| } | |
| static Swt_Node* swt__insert(Swt_Node* a, Swt_Node* b) { | |
| Swt_Node* a_next = a->next; | |
| a->next = b; | |
| b->next = a_next; | |
| return b; | |
| } | |
| static void swt__append(Swt_Node* list, Swt_Node* kid) { | |
| assert(list->tag == SWT_NODE_LIST); | |
| if (list->kid == NULL) { | |
| list->kid = kid; | |
| } else { | |
| Swt_Node* tail = list->kid; | |
| if (tail != NULL) { | |
| while (tail->next != NULL) tail = tail->next; | |
| } | |
| tail->next = kid; | |
| } | |
| } | |
| static Swt_Node* swt__create_word(Swt_Unit* unit, Swt_String s) { | |
| Swt_Node* n = swt__push(unit); | |
| n->tag = SWT_NODE_WORD; | |
| n->str = swt__alloc_str(unit, s); | |
| n->next = 0; | |
| return n; | |
| } | |
| static Swt_Node* swt__parse_node(Swt_Unit* unit, Swt_String* line) { | |
| Swt_String token = swt__next_token(line); | |
| if (token.length == 0) { | |
| return NULL; | |
| } else if (token.length == 1 && token.data[0] == '{') { | |
| // infix exprs | |
| swt__eat_whitespace(line); | |
| Swt_Node* list = swt__push(unit); | |
| list->tag = SWT_NODE_LIST; | |
| list->kid = swt__parse_infix(unit, line); | |
| list->next = NULL; | |
| swt__eat_whitespace(line); | |
| return list; | |
| } else if (token.length == 1 && token.data[0] == '(') { | |
| // normal S-exprs | |
| swt__eat_whitespace(line); | |
| Swt_Node* list = swt__push(unit); | |
| list->tag = SWT_NODE_LIST; | |
| list->kid = NULL; | |
| list->next = NULL; | |
| while (line->length && *line->data != ')') { | |
| swt__append(list, swt__parse_node(unit, line)); | |
| } | |
| swt__advance(line, 1); | |
| swt__eat_whitespace(line); | |
| return list; | |
| } | |
| swt__eat_whitespace(line); | |
| return swt__create_word(unit, token); | |
| } | |
| // { a + b } => (+ a b) | |
| // { a * b * 3 } => (* * a b 3) | |
| static Swt_Node* swt__parse_infix(Swt_Unit* unit, Swt_String* line) { | |
| Swt_Node* lhs = swt__parse_node(unit, line); | |
| Swt_Node *head = NULL, *tail = NULL; | |
| Swt_String actual_op = { 0 }; // we can only bind one per infix level | |
| for (;;) { | |
| swt__eat_whitespace(line); | |
| Swt_String op = swt__next_token(line); | |
| if (op.length == 0 || (op.length == 1 && op.data[0] == '}')) { | |
| // if we have no head (lmao) then we just defer to the left hand expression | |
| if (head == NULL) head = lhs; | |
| break; | |
| } | |
| swt__eat_whitespace(line); | |
| if (actual_op.length == 0) { | |
| actual_op = op; | |
| } else if (!swt__equal(actual_op, op.length, op.data)) { | |
| fprintf(stderr, "Operators in infix expr do not match (got '%.*s', expected '%.*s')", (int) op.length, op.data, (int) actual_op.length, actual_op.data); | |
| abort(); | |
| } | |
| Swt_Node* rhs = swt__parse_node(unit, line); | |
| if (head == NULL) { | |
| Swt_Node* op_node = swt__create_word(unit, op); | |
| head = op_node; | |
| op_node->next = lhs; | |
| lhs->next = rhs; | |
| tail = rhs; | |
| } else { | |
| tail->next = rhs; | |
| tail = rhs; | |
| } | |
| } | |
| return head; | |
| } | |
| SWEETIE_API void swt_parse_indent_level(Swt_Unit* unit, Swt_String* contents, Swt_Node* parent, int level) { | |
| while (contents->length > 0) { | |
| // same level we just parse everything in that line, if there's more | |
| // than one element then we wrap it in a list | |
| Swt_Node *head = NULL, *tail = NULL; | |
| for (Swt_Node* n; (n = swt__parse_node(unit, contents)) != NULL;) { | |
| if (head == NULL) { | |
| head = tail = n; | |
| } else { | |
| tail->next = n; | |
| tail = n; | |
| } | |
| } | |
| Swt_Node* new_parent = NULL; | |
| if (head != tail) { | |
| // make the old slot into a list | |
| new_parent = swt__push(unit); | |
| new_parent->tag = SWT_NODE_LIST; | |
| new_parent->kid = head; | |
| new_parent->next = NULL; | |
| } else { | |
| new_parent = head; | |
| } | |
| swt__append(parent, new_parent); | |
| swt__eat_line(contents); | |
| // read current identation | |
| size_t current_indent = 0; | |
| while (current_indent < contents->length && contents->data[current_indent] == ' ') { | |
| current_indent += 1; | |
| } | |
| // EOF | |
| if (contents->length == current_indent) return; | |
| if (current_indent < level) return; | |
| swt__advance(contents, current_indent); | |
| if (current_indent > level) { | |
| swt_parse_indent_level(unit, contents, new_parent, current_indent); | |
| } | |
| } | |
| } | |
| SWEETIE_API Swt_Node* swt_parse(Swt_Unit* unit, Swt_String contents) { | |
| // initialize string table (slap a null slot) | |
| unit->string_table[0] = '\0'; | |
| unit->string_table_length = 1; | |
| Swt_Node* n = swt__push(unit); | |
| n->tag = SWT_NODE_LIST; | |
| n->next = NULL; | |
| n->kid = NULL; | |
| swt_parse_indent_level(unit, &contents, n, 0); | |
| return n; | |
| } | |
| SWEETIE_API void swt_print(Swt_Unit* unit, Swt_Node* n, int depth) { | |
| if (n->tag == SWT_NODE_LIST) { | |
| printf("(\n"); | |
| bool first = true; | |
| SWT_FOR_KIDS(it, n) { | |
| for (int i = 0; i < depth+1; i++) printf(" "); | |
| swt_print(unit, it, depth + 1); | |
| printf("\n"); | |
| } | |
| for (int i = 0; i < depth; i++) printf(" "); | |
| printf(")"); | |
| } else if (n->tag == SWT_NODE_WORD) { | |
| printf("%s", n->str); | |
| } | |
| } | |
| #endif /* SWEETIE_IMPL */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| a | |
| b | |
| c | |
| d e | |
| f | |
| + 1 (* 2 3) 4 | |
| { a * b * 3 } ; this is an infix expr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment