Skip to content

Instantly share code, notes, and snippets.

@RealNeGate
Last active October 28, 2022 18:14
Show Gist options
  • Select an option

  • Save RealNeGate/bc7b5f9d0a69aa50b9f2b6a6e162bb5a to your computer and use it in GitHub Desktop.

Select an option

Save RealNeGate/bc7b5f9d0a69aa50b9f2b6a6e162bb5a to your computer and use it in GitHub Desktop.
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#define SWEETIE_IMPL
#include "sweetie.h"
static char* read_entire_file(const char* filepath) {
FILE* file = fopen(filepath, "rb");
if (file == NULL) {
fprintf(stderr, "error: could not open '%s'\n", filepath);
return NULL;
}
// go to the end of the file and figure out how far that is
fseek(file, 0L, SEEK_END);
long file_length = ftell(file);
char* buffer = malloc(file_length + 1);
// go back to the start
fseek(file, 0L, SEEK_SET);
fread(buffer, sizeof(char), file_length, file);
buffer[file_length] = '\0';
fclose(file);
return buffer;
}
int main(void) {
Swt_String str;
str.data = read_entire_file("test.txt");
str.length = strlen(str.data);
Swt_Unit unit = { 0 };
unit.node_capacity = 1024;
unit.nodes = malloc(1024 * sizeof(Swt_Node));
unit.string_table_capacity = 4096;
unit.string_table = malloc(4096);
swt_parse(&unit, str);
swt_print(&unit, &unit.nodes[0], 0);
return 0;
}
#pragma once
#ifndef SWEETIE_H
#define SWEETIE_H
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#define SWEETIE_API extern
////////////////////////////////
// API
////////////////////////////////
typedef struct {
size_t length;
const char* data;
} Swt_String;
// intrusive linked list
typedef struct Swt_Node {
enum Swt_NodeTag {
SWT_NODE_NIL,
SWT_NODE_LIST,
SWT_NODE_INT,
SWT_NODE_WORD,
} tag;
struct Swt_Node* next;
union {
struct Swt_Node* kid;
const char* str;
int num;
};
} Swt_Node;
typedef struct Swt_Unit Swt_Unit;
#define SWT_FOR_KIDS(it, node) \
if ((node)->tag == SWT_NODE_LIST) for (Swt_Node *it = (node)->kid; it != NULL; it = it->next)
inline static bool swt_match_word(Swt_Node* n, const char* str) {
return n->tag == SWT_NODE_WORD && strcmp(n->str, str) == 0;
}
SWEETIE_API void swt_print(Swt_Unit* unit, Swt_Node* n, int depth);
SWEETIE_API Swt_Node* swt_parse(Swt_Unit* unit, Swt_String contents);
////////////////////////////////
// Private structures
////////////////////////////////
struct Swt_Unit {
size_t node_capacity;
size_t node_count;
Swt_Node* nodes;
size_t string_table_length;
size_t string_table_capacity;
char* string_table;
};
#endif /* SWEETIE_H */
////////////////////////////////
// Implementation
////////////////////////////////
#ifdef SWEETIE_IMPL
static Swt_Node* swt__parse_infix(Swt_Unit* unit, Swt_String* line);
static bool swt__isnormal(char ch) {
return ch != ' ' && ch != '\n' && ch != '\r'
&& ch != '{' && ch != '}'
&& ch != '(' && ch != ')'
&& ch != '[' && ch != ']';
}
static bool swt__iswhitespace(char ch) {
return ch == ' ' || ch == '\t';
}
// returns the region it just advanced away from
static Swt_String swt__advance(Swt_String* contents, size_t adv) {
assert(contents->length >= adv);
Swt_String s = { adv, contents->data };
contents->data += adv;
contents->length -= adv;
return s;
}
static bool swt__equal(Swt_String contents, size_t length, const char* data) {
return contents.length == length && memcmp(contents.data, data, length) == 0;
}
static Swt_String swt__eat_whitespace(Swt_String* contents) {
size_t i = 0;
while (i < contents->length && swt__iswhitespace(contents->data[i])) {
i += 1;
}
return swt__advance(contents, i);
}
static Swt_String swt__eat_line(Swt_String* contents) {
size_t i = 0;
while (i < contents->length && (contents->data[i] == '\r' || contents->data[i] == '\n')) {
i += 1;
}
return swt__advance(contents, i);
}
static Swt_String swt__next_token(Swt_String* contents) {
retry:
if (contents->length == 0) {
return (Swt_String){ 0 };
}
switch (contents->data[0]) {
// special tokens
case '(': case ')':
case '{': case '}':
case '[': case ']':
return swt__advance(contents, 1);
case ';':
size_t i = 0;
while (i < contents->length && contents->data[i] != '\n') {
i += 1;
}
swt__advance(contents, i);
goto retry;
default: {
Swt_String token = { 0 };
size_t i = 0;
while (i < contents->length && swt__isnormal(contents->data[i])) {
i += 1;
}
return swt__advance(contents, i);
}
}
}
static const char* swt__alloc_str(Swt_Unit* unit, Swt_String str) {
assert(unit->string_table_length + str.length < unit->string_table_capacity);
ptrdiff_t pos = unit->string_table_length;
memcpy(&unit->string_table[pos], str.data, str.length);
unit->string_table[pos + str.length] = 0;
unit->string_table_length += str.length + 1;
return &unit->string_table[pos];
}
static Swt_Node* swt__push(Swt_Unit* unit) {
assert(unit->node_count < unit->node_capacity);
return &unit->nodes[unit->node_count++];
}
static Swt_Node* swt__insert(Swt_Node* a, Swt_Node* b) {
Swt_Node* a_next = a->next;
a->next = b;
b->next = a_next;
return b;
}
static void swt__append(Swt_Node* list, Swt_Node* kid) {
assert(list->tag == SWT_NODE_LIST);
if (list->kid == NULL) {
list->kid = kid;
} else {
Swt_Node* tail = list->kid;
if (tail != NULL) {
while (tail->next != NULL) tail = tail->next;
}
tail->next = kid;
}
}
static Swt_Node* swt__create_word(Swt_Unit* unit, Swt_String s) {
Swt_Node* n = swt__push(unit);
n->tag = SWT_NODE_WORD;
n->str = swt__alloc_str(unit, s);
n->next = 0;
return n;
}
static Swt_Node* swt__parse_node(Swt_Unit* unit, Swt_String* line) {
Swt_String token = swt__next_token(line);
if (token.length == 0) {
return NULL;
} else if (token.length == 1 && token.data[0] == '{') {
// infix exprs
swt__eat_whitespace(line);
Swt_Node* list = swt__push(unit);
list->tag = SWT_NODE_LIST;
list->kid = swt__parse_infix(unit, line);
list->next = NULL;
swt__eat_whitespace(line);
return list;
} else if (token.length == 1 && token.data[0] == '(') {
// normal S-exprs
swt__eat_whitespace(line);
Swt_Node* list = swt__push(unit);
list->tag = SWT_NODE_LIST;
list->kid = NULL;
list->next = NULL;
while (line->length && *line->data != ')') {
swt__append(list, swt__parse_node(unit, line));
}
swt__advance(line, 1);
swt__eat_whitespace(line);
return list;
}
swt__eat_whitespace(line);
return swt__create_word(unit, token);
}
// { a + b } => (+ a b)
// { a * b * 3 } => (* * a b 3)
static Swt_Node* swt__parse_infix(Swt_Unit* unit, Swt_String* line) {
Swt_Node* lhs = swt__parse_node(unit, line);
Swt_Node *head = NULL, *tail = NULL;
Swt_String actual_op = { 0 }; // we can only bind one per infix level
for (;;) {
swt__eat_whitespace(line);
Swt_String op = swt__next_token(line);
if (op.length == 0 || (op.length == 1 && op.data[0] == '}')) {
// if we have no head (lmao) then we just defer to the left hand expression
if (head == NULL) head = lhs;
break;
}
swt__eat_whitespace(line);
if (actual_op.length == 0) {
actual_op = op;
} else if (!swt__equal(actual_op, op.length, op.data)) {
fprintf(stderr, "Operators in infix expr do not match (got '%.*s', expected '%.*s')", (int) op.length, op.data, (int) actual_op.length, actual_op.data);
abort();
}
Swt_Node* rhs = swt__parse_node(unit, line);
if (head == NULL) {
Swt_Node* op_node = swt__create_word(unit, op);
head = op_node;
op_node->next = lhs;
lhs->next = rhs;
tail = rhs;
} else {
tail->next = rhs;
tail = rhs;
}
}
return head;
}
SWEETIE_API void swt_parse_indent_level(Swt_Unit* unit, Swt_String* contents, Swt_Node* parent, int level) {
while (contents->length > 0) {
// same level we just parse everything in that line, if there's more
// than one element then we wrap it in a list
Swt_Node *head = NULL, *tail = NULL;
for (Swt_Node* n; (n = swt__parse_node(unit, contents)) != NULL;) {
if (head == NULL) {
head = tail = n;
} else {
tail->next = n;
tail = n;
}
}
Swt_Node* new_parent = NULL;
if (head != tail) {
// make the old slot into a list
new_parent = swt__push(unit);
new_parent->tag = SWT_NODE_LIST;
new_parent->kid = head;
new_parent->next = NULL;
} else {
new_parent = head;
}
swt__append(parent, new_parent);
swt__eat_line(contents);
// read current identation
size_t current_indent = 0;
while (current_indent < contents->length && contents->data[current_indent] == ' ') {
current_indent += 1;
}
// EOF
if (contents->length == current_indent) return;
if (current_indent < level) return;
swt__advance(contents, current_indent);
if (current_indent > level) {
swt_parse_indent_level(unit, contents, new_parent, current_indent);
}
}
}
SWEETIE_API Swt_Node* swt_parse(Swt_Unit* unit, Swt_String contents) {
// initialize string table (slap a null slot)
unit->string_table[0] = '\0';
unit->string_table_length = 1;
Swt_Node* n = swt__push(unit);
n->tag = SWT_NODE_LIST;
n->next = NULL;
n->kid = NULL;
swt_parse_indent_level(unit, &contents, n, 0);
return n;
}
SWEETIE_API void swt_print(Swt_Unit* unit, Swt_Node* n, int depth) {
if (n->tag == SWT_NODE_LIST) {
printf("(\n");
bool first = true;
SWT_FOR_KIDS(it, n) {
for (int i = 0; i < depth+1; i++) printf(" ");
swt_print(unit, it, depth + 1);
printf("\n");
}
for (int i = 0; i < depth; i++) printf(" ");
printf(")");
} else if (n->tag == SWT_NODE_WORD) {
printf("%s", n->str);
}
}
#endif /* SWEETIE_IMPL */
a
b
c
d e
f
+ 1 (* 2 3) 4
{ a * b * 3 } ; this is an infix expr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment