Last active
May 4, 2025 12:01
-
-
Save jdmichaud/28d01163fae964ec3fe7fa4f1867843f to your computer and use it in GitHub Desktop.
Expression Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ast.c | |
#include "ast.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> // For strdup, strcmp | |
#include <assert.h> | |
// --- Node Creation Functions --- | |
size_t strnlen(const char *s, size_t maxlen) { | |
if (s == NULL) { | |
return 0; | |
} | |
size_t len = 0; | |
while (len < maxlen && s[len] != '\0') { | |
len++; | |
} | |
return len; | |
} | |
char *strndup(const char *s, size_t n) { | |
if (s == NULL) { | |
return NULL; | |
} | |
size_t len = strnlen(s, n); | |
char *new_str = (char *)malloc(len + 1); | |
if (new_str == NULL) { | |
return NULL; | |
} | |
memcpy(new_str, s, len); | |
new_str[len] = '\0'; | |
return new_str; | |
} | |
Node *create_number_node(double value) { | |
Node *node = (Node *)malloc(sizeof(Node)); | |
if (!node) return NULL; | |
node->type = NODE_NUMBER; | |
node->data.number_value = value; | |
return node; | |
} | |
Node *create_identifier_node(const char *name) { | |
Node *node = (Node *)malloc(sizeof(Node)); | |
if (!node) return NULL; | |
node->type = NODE_IDENTIFIER; | |
node->data.identifier_name = strndup(name, 32); // Duplicate the string | |
if (!node->data.identifier_name) { | |
free(node); | |
return NULL; | |
} | |
return node; | |
} | |
Node *create_binary_op_node(char op, Node *left, Node *right) { | |
Node *node = (Node *)malloc(sizeof(Node)); | |
if (!node) return NULL; | |
node->type = NODE_BINARY_OP; | |
node->data.binary_op.op = op; | |
node->data.binary_op.left = left; | |
node->data.binary_op.right = right; | |
return node; | |
} | |
Node *create_unary_op_node(char op, Node *operand) { | |
Node *node = (Node *)malloc(sizeof(Node)); | |
if (!node) return NULL; | |
node->type = NODE_UNARY_OP; | |
node->data.unary_op.op = op; | |
node->data.unary_op.operand = operand; | |
return node; | |
} | |
Node *create_func_call_node(const char *func_name) { | |
assert(func_name != NULL); | |
Node *node = (Node *)malloc(sizeof(Node)); | |
if (!node) return NULL; | |
node->type = NODE_FUNC_CALL; | |
char *func_name_dup = strndup(func_name, 32); | |
node->data.func_call.func_name = func_name_dup; | |
if (!node->data.func_call.func_name) { | |
free(node); | |
return NULL; | |
} | |
node->data.func_call.args = NULL; // Initialize args array | |
node->data.func_call.arg_count = 0; | |
return node; | |
} | |
// Helper to add an argument to a function call node | |
void add_func_arg(Node *func_call_node, Node *arg_node) { | |
if (!func_call_node || func_call_node->type != NODE_FUNC_CALL || !arg_node) { | |
// Or handle error appropriately, maybe return an error code | |
fprintf(stderr, "Error: Invalid call to add_func_arg.\n"); | |
return; | |
} | |
FuncCallNode *fc = &func_call_node->data.func_call; | |
fc->arg_count++; | |
// Reallocate args array - inefficient for many args, but simple | |
Node **new_args = (Node **)realloc(fc->args, fc->arg_count * sizeof(Node *)); | |
if (!new_args) { | |
// Handle realloc failure | |
fprintf(stderr, "Error: Failed to reallocate memory for function arguments.\n"); | |
fc->arg_count--; // Revert count | |
// The caller should ideally handle the freeing of arg_node in case of error | |
// Or we decide ownership transfer fails here. Let's assume caller manages arg_node on failure. | |
// Alternatively, we could free arg_node here: free_ast(arg_node); | |
// For simplicity now, we just report error. A robust system needs clear ownership rules. | |
return; // Indicate failure (maybe change func signature to return int) | |
} | |
fc->args = new_args; | |
fc->args[fc->arg_count - 1] = arg_node; // Add the new argument | |
} | |
// --- Memory Management --- | |
void free_ast(Node *node) { | |
if (!node) return; | |
switch (node->type) { | |
case NODE_NUMBER: | |
// No dynamic memory inside number node data itself | |
break; | |
case NODE_IDENTIFIER: | |
free(node->data.identifier_name); // Free duplicated string | |
break; | |
case NODE_BINARY_OP: | |
free_ast(node->data.binary_op.left); | |
free_ast(node->data.binary_op.right); | |
break; | |
case NODE_UNARY_OP: | |
free_ast(node->data.unary_op.operand); | |
break; | |
case NODE_FUNC_CALL: | |
free(node->data.func_call.func_name); // Free duplicated string | |
for (int i = 0; i < node->data.func_call.arg_count; ++i) { | |
free_ast(node->data.func_call.args[i]); | |
} | |
free(node->data.func_call.args); // Free the args array itself | |
break; | |
} | |
free(node); // Free the node struct itself | |
} | |
// --- AST Printing (for Debugging) --- | |
void print_ast(Node *node, int indent) { | |
if (!node) return; | |
for (int i = 0; i < indent; ++i) printf(" "); // Indentation | |
switch (node->type) { | |
case NODE_NUMBER: | |
printf("NUMBER: %g\n", node->data.number_value); // Use %g for cleaner float output | |
break; | |
case NODE_IDENTIFIER: | |
printf("IDENTIFIER: %s\n", node->data.identifier_name); | |
break; | |
case NODE_BINARY_OP: | |
// Special case for printing '%' character using '%%' | |
if (node->data.binary_op.op == '%') { | |
printf("BINARY_OP: '%%'\n"); | |
} else { | |
printf("BINARY_OP: '%c'\n", node->data.binary_op.op); | |
} | |
print_ast(node->data.binary_op.left, indent + 1); | |
print_ast(node->data.binary_op.right, indent + 1); | |
break; | |
case NODE_UNARY_OP: | |
printf("UNARY_OP: '%c'\n", node->data.unary_op.op); | |
print_ast(node->data.unary_op.operand, indent + 1); | |
break; | |
case NODE_FUNC_CALL: | |
printf("FUNC_CALL: %s (%d args)\n", node->data.func_call.func_name, node->data.func_call.arg_count); | |
for (int i = 0; i < node->data.func_call.arg_count; ++i) { | |
print_ast(node->data.func_call.args[i], indent + 1); | |
} | |
break; | |
default: | |
printf("UNKNOWN NODE TYPE\n"); | |
break; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ast.h | |
#ifndef AST_H | |
#define AST_H | |
#include <stdlib.h> // For size_t | |
// Enum to define the type of AST node | |
typedef enum { | |
NODE_NUMBER, // Constant number | |
NODE_IDENTIFIER, // Variable or function name (before resolution) | |
NODE_BINARY_OP, // Binary operation (+, -, *, /, %, ^) | |
NODE_UNARY_OP, // Unary operation (negation '-') | |
NODE_FUNC_CALL // Function call | |
} NodeType; | |
// Forward declaration for recursive structures | |
struct Node; | |
// Structure for binary operations | |
typedef struct { | |
struct Node *left; | |
struct Node *right; | |
char op; // '+', '-', '*', '/', '%', '^' | |
} BinaryOpNode; | |
// Structure for unary operations | |
typedef struct { | |
struct Node *operand; | |
char op; // Currently just '-' for negation | |
} UnaryOpNode; | |
// Structure for function calls | |
typedef struct { | |
char *func_name; | |
struct Node **args; // Dynamically allocated array of argument nodes | |
int arg_count; | |
} FuncCallNode; | |
// The main AST Node structure using a tagged union | |
typedef struct Node { | |
NodeType type; | |
union { | |
double number_value; // For NODE_NUMBER | |
char *identifier_name; // For NODE_IDENTIFIER | |
BinaryOpNode binary_op; // For NODE_BINARY_OP | |
UnaryOpNode unary_op; // For NODE_UNARY_OP | |
FuncCallNode func_call; // For NODE_FUNC_CALL | |
} data; | |
} Node; | |
// --- Function Prototypes for AST manipulation --- | |
// Create node functions | |
Node *create_number_node(double value); | |
Node *create_identifier_node(const char *name); | |
Node *create_binary_op_node(char op, Node *left, Node *right); | |
Node *create_unary_op_node(char op, Node *operand); | |
Node *create_func_call_node(const char *func_name); | |
void add_func_arg(Node *func_call_node, Node *arg_node); // Helper | |
// Free the entire AST | |
void free_ast(Node *node); | |
// Print the AST (for debugging/visualization) | |
void print_ast(Node *node, int indent); | |
#endif // AST_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// main.c | |
#include <stdio.h> | |
#include <stdlib.h> // For exit, EXIT_FAILURE | |
#include <string.h> // For strcmp, strcspn, strlen | |
#include "parser.h" // Includes ast.h implicitly via parser.h | |
int main() { | |
char input_buffer[1024]; | |
printf("Simple Expression Parser\n"); | |
printf("Handles: +, -, *, /, %%, ^, (), functions e.g., func(a, b+c)\n"); | |
printf("Enter expression (or 'quit' to exit):\n"); | |
while (1) { | |
printf("> "); | |
if (!fgets(input_buffer, sizeof(input_buffer), stdin)) { | |
printf("\nEOF detected. Goodbye!\n"); | |
break; // End of file or read error | |
} | |
// Remove trailing newline potentially added by fgets | |
input_buffer[strcspn(input_buffer, "\n")] = 0; | |
if (strcmp(input_buffer, "quit") == 0) { | |
printf("Goodbye!\n"); | |
break; | |
} | |
// Skip empty lines silently | |
if (strlen(input_buffer) == 0) { | |
continue; | |
} | |
Parser parser; | |
init_parser(&parser, input_buffer); | |
printf("\nParsing: \"%s\"\n", input_buffer); | |
Node *ast = parse(&parser); | |
if (ast) { | |
printf("--- AST ---\n"); | |
print_ast(ast, 0); | |
printf("-----------\n"); | |
free_ast(ast); // CRUCIAL: Free the memory allocated for the AST | |
} else { | |
fprintf(stderr, "Parse Error: %s\n", parser.error_msg); | |
// Provide context for the error location | |
if (parser.error_pos) { | |
int error_offset = parser.error_pos - parser.input; | |
fprintf(stderr, "Input: %s\n", parser.input); | |
// Print a caret pointing to the error position | |
fprintf(stderr, " "); | |
for (int i = 0; i < error_offset; ++i) { | |
// Handle tabs correctly if they were allowed (they are skipped now) | |
fprintf(stderr, "%c", (parser.input[i] == '\t' ? '\t' : ' ')); | |
} | |
fprintf(stderr, "^\n"); | |
// Show a snippet of the input near the error | |
fprintf(stderr, "Near: '%.*s'\n", | |
(int)strlen(parser.error_pos) > 15 ? 15 : (int)strlen(parser.error_pos), | |
parser.error_pos); | |
} | |
printf("-----------\n"); | |
} | |
printf("\n"); // Add space before the next prompt | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compiler | |
CC = gcc | |
SANITIZEFLAGS=-fsanitize=address -fsanitize=undefined | |
# Compiler Flags | |
# -Wall -Wextra: Enable comprehensive warnings (recommended) | |
# -g: Include debugging symbols (for GDB) | |
# -std=c99: Specify C standard (or -std=c11, -std=gnu99 etc.) | |
# -O2: Optimization level (optional, use for release builds) | |
# CFLAGS = -Wall -Wextra -g -std=c99 -O2 | |
CFLAGS = -Wall -Wextra -g -std=c99 -ggdb3 $(SANITIZEFLAGS) | |
# Linker Flags (usually empty unless linking specific directories) | |
LDFLAGS = $(SANITIZEFLAGS) | |
# Libraries to Link | |
# -lm: Required for math functions like strtod used in parser.c | |
LDLIBS = -lm | |
# The final executable name | |
TARGET = expression_parser | |
# Source files (.c files) | |
SOURCES = main.c parser.c ast.c | |
# Object files (.o files) - Automatically generated from SOURCES | |
# This uses substitution: replaces every '.c' at the end of a word in SOURCES with '.o' | |
OBJECTS = $(SOURCES:.c=.o) | |
# Header files - used for dependency tracking | |
# If any of these change, relevant source files will be recompiled | |
HEADERS = parser.h ast.h | |
# --- Rules --- | |
# Default Target: The first target in the file is the default executed when 'make' is run. | |
# Builds the final executable. Depends on all object files. | |
all: $(TARGET) | |
# Rule to link the final executable | |
# $@: The target name (expression_parser) | |
# $^: The names of all prerequisites (the .o files) | |
$(TARGET): $(OBJECTS) | |
@echo "Linking $@..." | |
$(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS) | |
@echo "$@ built successfully." | |
# Generic rule to compile a .c file into a .o file | |
# %.o: Target pattern - matches any file ending in .o | |
# %.c: Prerequisite pattern - matches the corresponding .c file | |
# $(HEADERS): Make rule dependent on all header files. If any header changes, | |
# object files depending on it will be rebuilt. | |
# $<: The name of the *first* prerequisite (the .c file) | |
%.o: %.c $(HEADERS) | |
@echo "Compiling $<..." | |
$(CC) $(CFLAGS) -c $< -o $@ | |
# Target to remove generated files (object files and the executable) | |
clean: | |
@echo "Cleaning up build files..." | |
rm -f $(OBJECTS) $(TARGET) # -f suppresses errors if files don't exist | |
@echo "Cleanup complete." | |
re: clean all | |
# Phony Targets: Declare targets that are not actual files. | |
# This prevents 'make' from getting confused if a file named 'clean' or 'all' exists, | |
# and ensures the commands always run when these targets are invoked. | |
.PHONY: all clean re | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// parser.c | |
#include "parser.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ctype.h> // For isspace, isdigit, isalpha, isalnum | |
#include <math.h> // For strtod | |
// --- Forward Declarations for Parsing Functions (Grammar Hierarchy) --- | |
// expression ::= term ( ( '+' | '-' ) term )* | |
// term ::= factor ( ( '*' | '/' | '%' ) factor )* | |
// factor ::= unary ( '^' factor )? // Right-associative power | |
// unary ::= '-' unary | primary | |
// primary ::= NUMBER | IDENTIFIER | IDENTIFIER '(' [arguments] ')' | '(' expression ')' | |
static Node *parse_expression(Parser *parser); | |
static Node *parse_term(Parser *parser); | |
static Node *parse_factor(Parser *parser); | |
static Node *parse_unary(Parser *parser); | |
static Node *parse_primary(Parser *parser); | |
// --- Helper Functions --- | |
static void skip_whitespace(Parser *parser) { | |
while (*parser->current && isspace((unsigned char)*parser->current)) { | |
parser->current++; | |
} | |
} | |
static char peek(Parser *parser) { | |
skip_whitespace(parser); | |
return *parser->current; | |
} | |
static char advance(Parser *parser) { | |
skip_whitespace(parser); | |
if (*parser->current) { | |
return *parser->current++; | |
} | |
return '\0'; // End of input | |
} | |
static int match(Parser *parser, char expected) { | |
skip_whitespace(parser); | |
if (*parser->current == expected) { | |
parser->current++; | |
return 1; | |
} | |
return 0; | |
} | |
static int is_at_end(Parser *parser) { | |
skip_whitespace(parser); | |
return *parser->current == '\0'; | |
} | |
// Error reporting - sets message and returns NULL | |
Node *report_error(Parser *parser, const char *message) { | |
parser->error_pos = parser->current; // Mark position *before* advancing past error token generally | |
snprintf(parser->error_msg, sizeof(parser->error_msg), "%s", message); | |
// Optional: Print error immediately | |
// fprintf(stderr, "Parse Error near '%.*s': %s\n", 10, parser->error_pos, message); | |
return NULL; // Indicate failure consistently | |
} | |
// --- Parsing Functions Implementation --- | |
// primary ::= NUMBER | IDENTIFIER | IDENTIFIER '(' [arguments] ')' | '(' expression ')' | |
static Node *parse_primary(Parser *parser) { | |
skip_whitespace(parser); | |
char current_char = peek(parser); | |
// Number (handles simple decimals, strtod handles more) | |
if (isdigit((unsigned char)current_char) || (current_char == '.' && isdigit((unsigned char)parser->current[1]))) { | |
const char *start = parser->current; | |
// More robust number scanning could be added here if needed (e.g., scientific notation) | |
while (isdigit((unsigned char)*parser->current) || *parser->current == '.') { | |
parser->current++; | |
} | |
// Use strtod for robust conversion | |
char *end; | |
double value = strtod(start, &end); | |
if (end != parser->current) { // Check if strtod consumed exactly what we scanned | |
parser->current = start; // Reset on failure | |
return report_error(parser, "Invalid number format"); | |
} | |
return create_number_node(value); | |
} | |
// Identifier (Variable or Function Call) | |
if (isalpha((unsigned char)current_char) || current_char == '_') { | |
const char *start = parser->current; | |
while (isalnum((unsigned char)*parser->current) || *parser->current == '_') { | |
parser->current++; | |
} | |
size_t length = parser->current - start; | |
char *name = (char *)malloc(length + 1); | |
if (!name) return report_error(parser, "Memory allocation failed for identifier"); | |
memcpy(name, start, length); | |
name[length] = '\0'; | |
skip_whitespace(parser); | |
if (peek(parser) == '(') { // Function Call | |
advance(parser); // Consume '(' | |
Node *func_node = create_func_call_node(name); | |
free(name); // Name is copied inside create_func_call_node | |
if (!func_node) return report_error(parser, "Memory allocation failed for function call"); | |
skip_whitespace(parser); | |
if (peek(parser) != ')') { // Check if there are arguments | |
do { | |
// Recursively parse each argument as a full expression | |
Node *arg = parse_expression(parser); | |
if (!arg) { | |
free_ast(func_node); // Clean up partially built node | |
// Error already reported by parse_expression | |
return NULL; | |
} | |
add_func_arg(func_node, arg); // Add argument to the list | |
skip_whitespace(parser); | |
} while (match(parser, ',')); // Continue if comma is found | |
} | |
if (!match(parser, ')')) { | |
free_ast(func_node); | |
return report_error(parser, "Expected ')' after function arguments"); | |
} | |
return func_node; | |
} else { // Variable Identifier | |
Node* id_node = create_identifier_node(name); | |
free(name); // Name is copied inside create_identifier_node | |
if (!id_node) return report_error(parser, "Memory allocation failed for identifier node"); | |
return id_node; | |
} | |
} | |
// Parentheses | |
if (current_char == '(') { | |
advance(parser); // Consume '(' | |
Node *expr = parse_expression(parser); // Parse the inner expression | |
if (!expr) return NULL; // Error reported deeper | |
if (!match(parser, ')')) { | |
free_ast(expr); // Clean up the parsed sub-expression | |
return report_error(parser, "Expected ')' after expression"); | |
} | |
return expr; // Return the parsed sub-expression node | |
} | |
// If nothing matches | |
char err_buf[50]; | |
snprintf(err_buf, sizeof(err_buf), "Unexpected token '%c'", current_char); | |
return report_error(parser, err_buf); | |
} | |
// unary ::= '-' unary | primary | |
static Node *parse_unary(Parser *parser) { | |
skip_whitespace(parser); | |
if (match(parser, '-')) { | |
// Recursively call parse_unary to handle multiple unary minuses like '--a' | |
Node *operand = parse_unary(parser); | |
if (!operand) return NULL; // Error propagated | |
Node* unary_node = create_unary_op_node('-', operand); | |
if (!unary_node) { | |
free_ast(operand); | |
return report_error(parser, "Memory allocation failed for unary operation"); | |
} | |
return unary_node; | |
} | |
// If no unary operator, parse the next higher precedence level (primary) | |
return parse_primary(parser); | |
} | |
// factor ::= unary ( '^' factor )? -- Handles right-associativity for power | |
static Node *parse_factor(Parser *parser) { | |
Node *left = parse_unary(parser); // Parse the base (left operand) | |
if (!left) return NULL; | |
skip_whitespace(parser); | |
if (peek(parser) == '^') { | |
advance(parser); // Consume '^' | |
// CRITICAL: For right-associativity, the right operand is parsed by | |
// calling the *same* function (parse_factor) recursively. | |
Node *right = parse_factor(parser); | |
if (!right) { | |
free_ast(left); | |
return NULL; // Error propagated | |
} | |
Node* binary_node = create_binary_op_node('^', left, right); | |
if (!binary_node) { | |
free_ast(left); | |
free_ast(right); | |
return report_error(parser, "Memory allocation failed for power operation"); | |
} | |
return binary_node; // Return the single binary node for this power op | |
} | |
// If no '^' operator found, just return the left operand (result of parse_unary) | |
return left; | |
} | |
// term ::= factor ( ( '*' | '/' | '%' ) factor )* -- Handles *, /, % (left-associative) | |
static Node *parse_term(Parser *parser) { | |
Node *left = parse_factor(parser); // Operands are factors (handles power/unary) | |
if (!left) return NULL; | |
skip_whitespace(parser); | |
// Left-associative loop for *, /, % | |
while (peek(parser) == '*' || peek(parser) == '/' || peek(parser) == '%') { | |
char op = advance(parser); // Consume the operator | |
Node *right = parse_factor(parser); // Parse the right operand | |
if (!right) { | |
free_ast(left); // Clean up left side on error | |
return NULL; // Propagate error | |
} | |
// Create a new binary node, making the previous 'left' the left child | |
Node* binary_node = create_binary_op_node(op, left, right); | |
if (!binary_node) { | |
free_ast(left); | |
free_ast(right); | |
return report_error(parser, "Memory allocation failed for binary operation"); | |
} | |
left = binary_node; // The result becomes the new left operand for the next iteration | |
skip_whitespace(parser); // Prepare for potential next operator | |
} | |
return left; // Return the final root of this term's subtree | |
} | |
// expression ::= term ( ( '+' | '-' ) term )* -- Handles +, - (left-associative) | |
static Node *parse_expression(Parser *parser) { | |
Node *left = parse_term(parser); // Operands are terms (handles mul/div/mod/pow/...) | |
if (!left) return NULL; | |
skip_whitespace(parser); | |
// Left-associative loop for + and - | |
while (peek(parser) == '+' || peek(parser) == '-') { | |
char op = advance(parser); // Consume the operator | |
Node *right = parse_term(parser); // Parse the right operand | |
if (!right) { | |
free_ast(left); // Clean up left side on error | |
return NULL; // Propagate error | |
} | |
// Create a new binary node | |
Node* binary_node = create_binary_op_node(op, left, right); | |
if (!binary_node) { | |
free_ast(left); | |
free_ast(right); | |
return report_error(parser, "Memory allocation failed for binary operation"); | |
} | |
left = binary_node; // Result becomes the new left operand | |
skip_whitespace(parser); // Prepare for potential next operator | |
} | |
return left; // Return the final root of the expression's AST | |
} | |
// --- Public Parser Functions --- | |
void init_parser(Parser *parser, const char *input) { | |
parser->input = input; | |
parser->current = input; | |
parser->error_pos = NULL; | |
parser->error_msg[0] = '\0'; | |
} | |
// Main entry point for parsing | |
Node *parse(Parser *parser) { | |
// Basic validation | |
if (!parser || !parser->input) { | |
if(parser) report_error(parser, "Invalid parser state or input"); | |
return NULL; | |
} | |
if (*(parser->input) == '\0') { | |
report_error(parser, "Empty input string"); | |
return NULL; // Handle empty input string | |
} | |
// Start parsing from the lowest precedence level | |
Node *ast = parse_expression(parser); | |
// After parsing, check for errors OR trailing characters | |
if (!ast) { | |
// An error occurred during parsing and should have been reported | |
// by one of the sub-parsing functions. Just return NULL. | |
return NULL; | |
} else if (!is_at_end(parser)) { | |
// Successfully parsed *something*, but didn't consume the whole string. | |
// This means there's extra unexpected input. | |
const char* current_pos_before_error = parser->current; // Save current pos for error | |
report_error(parser, "Unexpected characters after expression"); // Set error message | |
parser->error_pos = current_pos_before_error; // Ensure error points to the garbage | |
free_ast(ast); // Clean up the partially valid AST | |
return NULL; // Indicate failure | |
} | |
// Success! The entire input was consumed and formed a valid AST. | |
return ast; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// parser.h | |
#ifndef PARSER_H | |
#define PARSER_H | |
#include "ast.h" // Needs AST definitions | |
// Structure to hold the parser state | |
typedef struct { | |
const char *input; // The input expression string | |
const char *current; // Pointer to the current character being parsed | |
const char *error_pos;// Position where an error occurred (if any) | |
char error_msg[100]; // Buffer for error message | |
} Parser; | |
// --- Function Prototypes for Parser --- | |
// Initialize the parser | |
void init_parser(Parser *parser, const char *input); | |
// Main parsing function - returns root of AST or NULL on error | |
Node *parse(Parser *parser); | |
// Error reporting helper (exposed in case needed, but primarily used internally) | |
// Returns NULL to allow convenient `return report_error(...)` usage. | |
Node *report_error(Parser *parser, const char *message); | |
#endif // PARSER_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// zig test parser.zig | |
const std = @import("std"); | |
const stdout = std.io.getStdOut().writer(); | |
const stderr = std.io.getStdOut().writer(); | |
fn cast(comptime T: type, integer: anytype) T { | |
return @as(T, @intCast(integer)); | |
} | |
// Forward Declarations for Parsing Functions (Grammar Hierarchy) | |
// expression ::= term ( ( '+' | '-' ) term )* | |
// term ::= factor ( ( '*' | '/' | '%' ) factor )* | |
// factor ::= '-' factor | power // Unary minus applied here | |
// power ::= primary ( '^' power )? // Exponentiation (right-assoc) applied here | |
// primary ::= NUMBER | IDENTIFIER | IDENTIFIER '(' [expression] ')' | '(' expression ')' | |
const Operator = enum { | |
plus, | |
minus, | |
mul, | |
div, | |
modulo, | |
power, | |
fn fromChar(c: u8) !Operator { | |
return switch (c) { | |
'+' => .plus, | |
'-' => .minus, | |
'*' => .mul, | |
'/' => .div, | |
'%' => .modulo, | |
'^' => .power, | |
else => error.UnknownOperator, | |
}; | |
} | |
fn toChar(o: Operator) !u8 { | |
return switch (o) { | |
.plus => '+', | |
.minus => '-', | |
.mul => '*', | |
.div => '/', | |
.modulo => '%', | |
.power => '^', | |
}; | |
} | |
}; | |
const NodeType = enum { | |
number, | |
unary_op, | |
binary_op, | |
function_call, | |
variable, | |
}; | |
const Node = union(NodeType) { | |
number: Number, | |
unary_op: UnaryOpNode, | |
binary_op: BinaryOpNode, | |
function_call: FunctionCallNode, | |
variable: VariableNode, | |
// Nodes will be stored in an array. A pointer to a node is an index in that | |
// array. | |
const Index = u16; | |
const Self = @This(); | |
}; | |
const NumberType = enum { | |
integer, | |
float, | |
}; | |
const Number = union(NumberType) { | |
integer: usize, | |
float: f32, | |
}; | |
const UnaryOpNode = struct { | |
operator: Operator, | |
operand: Node.Index, | |
}; | |
const BinaryOpNode = struct { | |
operator: Operator, | |
lhs: Node.Index, | |
rhs: Node.Index, | |
}; | |
const FunctionCallNode = struct { | |
const ArgumentArrayType = std.BoundedArray(Node.Index, 16); | |
identifier: []const u8, | |
arguments: ArgumentArrayType, | |
}; | |
const VariableNode = struct { | |
identifier: []const u8, | |
}; | |
const ParseError = error { | |
MissingCloseParenthesis, | |
NotEnoughInput, | |
Overflow, | |
InvalidCharacter, | |
UnknownOperator, | |
UnexpectedToken, | |
NodeArrayTooSmall, | |
UnexpectedEnd, | |
}; | |
const ErrorState = struct { | |
code: ParseError, | |
location: usize, | |
message_buf: [255:0]u8, | |
message: [:0]const u8, | |
}; | |
const Parser = struct { | |
input: []const u8, | |
current: usize, | |
nodes: Ast, | |
error_state: *ErrorState, | |
// The AST (and its Nodes) are stored in a BoundedArray. | |
const Ast = std.ArrayListUnmanaged(Node); | |
const Self = @This(); | |
fn init(input: []const u8, nodes: []Node, error_state: *ErrorState) !Self { | |
error_state.location = 0; | |
return Self{ | |
.input = input, | |
.current = 0, | |
.error_state = error_state, | |
.nodes = .initBuffer(nodes), | |
}; | |
} | |
fn skipWhitespace(self: *Self) void { | |
while (self.current < self.input.len and std.ascii.isWhitespace(self.input[self.current])) : ({ | |
self.current += 1; | |
}) { | |
// nothing | |
} | |
} | |
fn peek(self: *Self) u8 { | |
self.skipWhitespace(); | |
return if (self.current < self.input.len) | |
self.input[self.current] | |
else | |
0; | |
} | |
fn peekAt(self: *Self, index: usize) u8 { | |
self.skipWhitespace(); | |
return if (self.current + index < self.input.len) | |
self.input[self.current + index] | |
else | |
0; | |
} | |
// +1 | |
fn accept(self: *Self) u8 { | |
self.skipWhitespace(); | |
self.current += 1; | |
return self.input[self.current - 1]; | |
} | |
// +len | |
fn advance(self: *Self, len: usize) ParseError!void { | |
if (self.current + len > self.input.len) { | |
return ParseError.NotEnoughInput; | |
} | |
self.current += len; | |
} | |
fn match(self: *Self, c: u8) bool { | |
if (self.peek() == c) { | |
_ = self.accept(); | |
return true; | |
} | |
return false; | |
} | |
fn span(self: *Self, len: usize) ParseError![]const u8 { | |
if (self.current + len <= self.input.len) { | |
return self.input[self.current..self.current + len]; | |
} | |
return ParseError.NotEnoughInput; | |
} | |
fn createNode(self: *Self, node: Node) !Node.Index { | |
std.log.debug("createNode len {} capacity {}", .{ self.nodes.items.len, self.nodes.capacity }); | |
if (self.nodes.items.len >= self.nodes.capacity) { | |
return self.setErrorState(error.NodeArrayTooSmall, | |
"the provided node slice is too small (len: {})", .{ self.nodes.capacity }); | |
} | |
self.nodes.appendAssumeCapacity(node); | |
return cast(u16, self.nodes.items.len - 1); | |
} | |
fn setErrorState(self: *Self, comptime code: ParseError, comptime msg: []const u8, | |
args: anytype) !Node.Index { | |
self.error_state.code = code; | |
self.error_state.location = self.current; | |
self.error_state.message = | |
std.fmt.bufPrintZ(self.error_state.message_buf[0..], msg, args) catch { @panic(""); }; | |
return code; | |
} | |
// primary ::= NUMBER | IDENTIFIER | IDENTIFIER '(' [expression] ')' | '(' expression ')' | |
fn parse_primary(self: *Self) !Node.Index { | |
std.log.debug("parse_primary begin {}", .{ self.current }); | |
self.skipWhitespace(); | |
// Match a number | |
if (std.ascii.isDigit(self.peek()) or (self.peek() == '.' and std.ascii.isDigit(self.peekAt(1)))) { | |
var i: usize = 0; | |
while (std.ascii.isDigit(self.peekAt(i)) or self.peekAt(i) == '.') : ({ i += 1; }) {} | |
if (self.peekAt(i) != 0 and self.peekAt(i) != ')' and self.peekAt(i) != ',' | |
and !std.ascii.isWhitespace(self.peekAt(i))) { | |
return self.setErrorState(error.InvalidCharacter, | |
"Unexpected character in a number {c}", .{ self.peekAt(i) }); | |
} | |
const strnumber = try self.span(i); | |
if (std.mem.indexOfScalar(u8, strnumber, '.')) |_| { | |
const number = try std.fmt.parseFloat(f32, strnumber); | |
try self.advance(i); | |
std.log.debug("parse_primary end1 {}", .{ self.current }); | |
return self.createNode(Node{ .number = Number{ .float = number } }); | |
} | |
else { | |
const number = try std.fmt.parseInt(usize, strnumber, 10); | |
try self.advance(i); | |
std.log.debug("parse_primary end2 {}", .{ self.current }); | |
return self.createNode(Node{ .number = Number{ .integer = number } }); | |
} | |
} | |
// Match an identifier | |
if (std.ascii.isAlphabetic(self.peek()) or self.peek() == '_') { | |
std.log.debug("identifier {c}", .{ self.peek() }); | |
var i: usize = 0; | |
while (std.ascii.isAlphanumeric(self.peekAt(i)) or self.peek() == '_') : ({ i += 1; }) {} | |
const identifier = try self.span(i); | |
try self.advance(i); | |
self.skipWhitespace(); | |
// Match a function call | |
if (self.peek() == '(') { | |
var arguments: FunctionCallNode.ArgumentArrayType = .{}; | |
_ = self.accept(); | |
self.skipWhitespace(); | |
std.log.debug("parse_primary found function current {}", .{ self.current }); | |
if (self.peek() != ')') { | |
std.log.debug("parse_primary parsing args current {}", .{ self.current }); | |
try arguments.append(try self.parse_expression()); | |
self.skipWhitespace(); | |
std.log.debug("parse_primary function self.peek() {c}", .{ self.peek() }); | |
while (self.peek() == ',') { | |
_ = self.accept(); | |
try arguments.append(try self.parse_expression()); | |
self.skipWhitespace(); | |
} | |
} | |
std.log.debug("parse_primary function self.peek() {c}", .{ self.peek() }); | |
if (self.peek() != ')') { | |
return self.setErrorState(error.MissingCloseParenthesis, | |
"missing closing parenthesis on call to {s}", .{ identifier }); | |
} | |
_ = self.accept(); | |
std.log.debug("parse_primary end3 {}", .{ self.current }); | |
return self.createNode(Node{ .function_call = FunctionCallNode{ | |
.identifier = identifier, | |
.arguments = arguments, | |
}}); | |
} else { | |
// Match a variable | |
std.log.debug("parse_primary end4 {} variable {s}", .{ self.current, identifier }); | |
return self.createNode(Node{ .variable = VariableNode{ | |
.identifier = identifier, | |
}}); | |
} | |
} | |
// Match a parenthesis | |
if (self.peek() == '(') { | |
_ = self.accept(); | |
const expr = try self.parse_expression(); | |
if (self.peek() != ')') { | |
return self.setErrorState(error.MissingCloseParenthesis, | |
"missing closing parenthesis", .{}); | |
} | |
_ = self.accept(); | |
std.log.debug("parse_primary end5 {}", .{ self.current }); | |
return expr; | |
} | |
std.log.debug("parse_primary end6 {}", .{ self.current }); | |
if (self.current >= self.input.len) { | |
return self.setErrorState(error.UnexpectedEnd, | |
"Unexpected end of stream", .{}); | |
} | |
return self.setErrorState(error.UnexpectedToken, | |
"Unexpected token {c}", .{ self.input[self.current] }); | |
} | |
// power ::= primary ( '^' power )? // Exponentiation (right-assoc) applied here | |
fn parse_power(self: *Self) !Node.Index { | |
std.log.debug("parse_power begin {}", .{ self.current }); | |
const primary = try self.parse_primary(); | |
self.skipWhitespace(); | |
if (self.peek() == '^') { | |
const operator = self.accept(); | |
const rhs = try self.parse_factor(); | |
const binary_op = try self.createNode(Node{ .binary_op = BinaryOpNode { | |
.operator = try Operator.fromChar(operator), | |
.lhs = primary, | |
.rhs = rhs, | |
}}); | |
std.log.debug("parse_power end {}", .{ self.current }); | |
return binary_op; | |
} | |
std.log.debug("parse_power end {}", .{ self.current }); | |
return primary; | |
} | |
// factor ::= '-' factor | power // Unary minus applied here | |
fn parse_factor(self: *Self) ParseError!Node.Index { | |
std.log.debug("parse_factor begin {}", .{ self.current }); | |
if (self.match('-')) { | |
const factor = try self.parse_factor(); | |
return self.createNode(Node{ .unary_op = UnaryOpNode { | |
.operator = Operator.minus, | |
.operand = factor, | |
}}); | |
} | |
std.log.debug("parse_factor end {}", .{ self.current }); | |
return self.parse_power(); | |
} | |
// term ::= factor ( ( '*' | '/' | '%' ) factor )* | |
fn parse_term(self: *Self) !Node.Index { | |
std.log.debug("parse_term begin {}", .{ self.current }); | |
var lhs = try self.parse_factor(); | |
self.skipWhitespace(); | |
while (self.peek() == '*' or self.peek() == '/' or self.peek() == '%') { | |
const operator = self.accept(); | |
const rhs = try self.parse_factor(); | |
const binary_op = try self.createNode(Node{ .binary_op = BinaryOpNode{ | |
.operator = try Operator.fromChar(operator), | |
.lhs = lhs, | |
.rhs = rhs, | |
}}); | |
lhs = binary_op; | |
} | |
std.log.debug("parse_term end {}", .{ self.current }); | |
return lhs; | |
} | |
// expression ::= term ( ( '+' | '-' ) term )* | |
fn parse_expression(self: *Self) ParseError!Node.Index { | |
std.log.debug("parse_expression begin {}", .{ self.current }); | |
var lhs = try self.parse_term(); | |
self.skipWhitespace(); | |
std.log.debug("parse_expression peek {c}", .{ self.peek() }); | |
while (self.peek() == '+' or self.peek() == '-') { | |
const operator = self.accept(); | |
const rhs = try self.parse_term(); | |
const binary_op = try self.createNode(Node{ .binary_op = BinaryOpNode{ | |
.operator = try Operator.fromChar(operator), | |
.lhs = lhs, | |
.rhs = rhs, | |
}}); | |
lhs = binary_op; | |
} | |
std.log.debug("parse_expression end {}", .{ self.current }); | |
return lhs; | |
} | |
}; | |
fn parse(expr: []const u8, ast: []Node, error_state: *ErrorState) !Node.Index { | |
var parser = try Parser.init(expr, ast, error_state); | |
const root = try parser.parse_expression(); | |
return root; | |
} | |
fn print_rpn(buffer: []u8, nodes: []Node, nodeIndex: Node.Index) ![]const u8 { | |
const node = nodes[nodeIndex]; | |
switch (node) { | |
.number => |number| { | |
// std.log.debug("number {}", .{ number }); | |
switch (number) { | |
.integer => |i| return try std.fmt.bufPrint(buffer, "{}", .{ i }), | |
.float => |f| return try std.fmt.bufPrint(buffer, "{d}", .{ f }), | |
} | |
}, | |
.unary_op => |unary_op| { | |
var printed_len: usize = 0; | |
{ | |
const printed = try print_rpn(buffer[printed_len..], nodes, unary_op.operand); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], " u{c}", .{ try Operator.toChar(unary_op.operator) }); | |
printed_len += printed.len; | |
} | |
return buffer[0..printed_len]; | |
}, | |
.binary_op => |binary_op| { | |
var printed_len: usize = 0; | |
{ | |
const printed = try print_rpn(buffer[printed_len..], nodes, binary_op.lhs); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], " ", .{}); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try print_rpn(buffer[printed_len..], nodes, binary_op.rhs); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], " ", .{}); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], "{c}", .{ try Operator.toChar(binary_op.operator) }); | |
printed_len += printed.len; | |
} | |
return buffer[0..printed_len]; | |
}, | |
.function_call => |function_call| { | |
var printed_len: usize = 0; | |
const args = function_call.arguments.slice(); | |
for (args) |arg| { | |
const printed_arg = try print_rpn(buffer[printed_len..], nodes, arg); | |
printed_len += printed_arg.len; | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], " ", .{}); | |
printed_len += printed.len; | |
} | |
const printed_identifier = try std.fmt.bufPrint(buffer[printed_len..], "{s}{}", .{ | |
function_call.identifier, | |
function_call.arguments.len, | |
}); | |
printed_len += printed_identifier.len; | |
return buffer[0..printed_len]; | |
}, | |
.variable => |variable| { | |
return try std.fmt.bufPrint(buffer, "{s}", .{ variable.identifier }); | |
}, | |
} | |
} | |
fn print_expr(buffer: []u8, nodes: []Node, nodeIndex: Node.Index) ![]const u8 { | |
const node = nodes[nodeIndex]; | |
switch (node) { | |
.number => |number| { | |
// std.log.debug("number {}", .{ number }); | |
return try std.fmt.bufPrint(buffer, "{}", .{ @as(usize, @intFromFloat(number)) }); | |
}, | |
.unary_op => |unary_op| { | |
var printed_len: usize = 0; | |
{ | |
const printed = try std.fmt.bufPrint(buffer, "{}", .{ try Operator.toChar(unary_op.operator) }); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try print_rpn(buffer[printed_len..], nodes, unary_op.operand); | |
printed_len += printed.len; | |
} | |
return buffer[0..printed_len]; | |
}, | |
.binary_op => |binary_op| { | |
// std.log.debug("binary_op", .{}); | |
var printed_len: usize = 0; | |
{ | |
const printed = try print_rpn(buffer, nodes, binary_op.lhs); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], " {c} ", .{ try Operator.toChar(binary_op.operator) }); | |
printed_len += printed.len; | |
} | |
{ | |
const printed = try print_rpn(buffer[printed_len..], nodes, binary_op.rhs); | |
printed_len += printed.len; | |
} | |
return buffer[0..printed_len]; | |
}, | |
.function_call => |function_call| { | |
const printed_identifier = try std.fmt.bufPrint(buffer, "{s}(", .{ function_call.identifier }); | |
var printed_len: usize = printed_identifier.len; | |
const args = function_call.arguments.slice(); | |
for (args, 0..) |arg, i| { | |
if (i < args.len) { | |
const printed_arg = try print_rpn(buffer, nodes, arg); | |
printed_len += printed_arg.len; | |
const printed = try std.fmt.bufPrint(buffer[printed_len..], ", ", .{}); | |
printed_len += printed.len; | |
} else { | |
const printed = try print_rpn(buffer, nodes, arg); | |
printed_len += printed.len; | |
} | |
} | |
const printed_parenthesis = try std.fmt.bufPrint(buffer, ")", .{}); | |
printed_len += printed_parenthesis.len; | |
return buffer[0..printed_len]; | |
}, | |
.variable => |variable| { | |
return try std.fmt.bufPrint(buffer, "{s}", .{ variable.identifier }); | |
}, | |
} | |
} | |
test "Parser" { | |
std.testing.log_level = .debug; | |
var nodes: [8]Node = undefined; | |
var error_state: ErrorState = undefined; | |
var parser = try Parser.init(" toto", &nodes, &error_state); | |
parser.skipWhitespace(); | |
try std.testing.expectEqual(parser.current, 2); | |
} | |
test "parse" { | |
// Prompt: The purpose is to test a expression parser. The expression parser | |
// takes an expression as input and returns the PRN expression. Give me a list | |
// of expressions and the expected output to test the parser. The parser | |
// supports operator precedence of +, -, *, /, % and ^. It supports all those | |
// operator as binary and also a unary - operator. It supports parenthesis, | |
// variables, nested function calls with arbitrary number of arguments, | |
// integer and decimal numbers. | |
const test_cases: []const struct { input: []const u8, expected_outcome: []const u8 } = &.{ | |
// Basic Binary Operations & Numbers | |
.{ .input = "3 + 4", .expected_outcome = "3 4 +" }, | |
.{ .input = "3 + 4 + 5", .expected_outcome = "3 4 + 5 +" }, | |
.{ .input = "5 - 2", .expected_outcome = "5 2 -" }, | |
.{ .input = "6 * 7", .expected_outcome = "6 7 *" }, | |
.{ .input = "8 / 2", .expected_outcome = "8 2 /" }, | |
.{ .input = "9 % 4", .expected_outcome = "9 4 %" }, | |
.{ .input = "2 ^ 3", .expected_outcome = "2 3 ^" }, | |
.{ .input = "3.14 + 2.71", .expected_outcome = "3.14 2.71 +" }, | |
.{ .input = "10 / 4.0", .expected_outcome = "10 4 /" }, | |
// Variables | |
.{ .input = "x + y", .expected_outcome = "x y +" }, | |
.{ .input = "radius * 2", .expected_outcome = "radius 2 *" }, | |
.{ .input = "count % limit", .expected_outcome = "count limit %" }, | |
// Operator Precedence (*, /, % higher than +, -) | |
.{ .input = "a + b * c", .expected_outcome = "a b c * +" }, | |
.{ .input = "a * b + c", .expected_outcome = "a b * c +" }, | |
.{ .input = "a - b / c", .expected_outcome = "a b c / -" }, | |
.{ .input = "a / b - c", .expected_outcome = "a b / c -" }, | |
.{ .input = "a + b % c - d", .expected_outcome = "a b c % + d -" }, | |
.{ .input = "a * b / c % d", .expected_outcome = "a b * c / d %" }, | |
// Operator Precedence (^ highest) | |
.{ .input = "a ^ b * c", .expected_outcome = "a b ^ c *" }, | |
.{ .input = "a * b ^ c", .expected_outcome = "a b c ^ *" }, | |
.{ .input = "a + b ^ c - d", .expected_outcome = "a b c ^ + d -" }, | |
// Associativity (Left for +, -, , /, %; Right for ^) | |
// (Parsed as (a - b) + c) | |
.{ .input = "a - b + c", .expected_outcome = "a b - c +" }, | |
// (Parsed as (a / b) * c) | |
.{ .input = "a / b * c", .expected_outcome = "a b / c *" }, | |
// (Parsed as a ^ (b ^ c)) | |
.{ .input = "a ^ b ^ c", .expected_outcome = "a b c ^ ^" }, | |
// Parentheses | |
.{ .input = "(a + b) * c", .expected_outcome = "a b + c *" }, | |
.{ .input = "a * (b + c)", .expected_outcome = "a b c + *" }, | |
.{ .input = "a / (b - c)", .expected_outcome = "a b c - /" }, | |
.{ .input = "((a + b))", .expected_outcome = "a b +" }, | |
.{ .input = "(a + b) ^ c", .expected_outcome = "a b + c ^" }, | |
.{ .input = "a ^ (b + c)", .expected_outcome = "a b c + ^" }, | |
.{ .input = "(a * (b + c) / d) ^ e", .expected_outcome = "a b c + * d / e ^" }, | |
// Unary Minus | |
.{ .input = "-a", .expected_outcome = "a u-" }, | |
.{ .input = "a + -b", .expected_outcome = "a b u- +" }, | |
.{ .input = "-a + b", .expected_outcome = "a u- b +" }, | |
.{ .input = "-a * b", .expected_outcome = "a u- b *" }, | |
.{ .input = "a * -b", .expected_outcome = "a b u- *" }, | |
// (Assuming ^ has higher precedence: -(a^b)) | |
.{ .input = "-a ^ b", .expected_outcome = "a b ^ u-" }, | |
.{ .input = "a ^ -b", .expected_outcome = "a b u- ^" }, | |
.{ .input = "-(a + b)", .expected_outcome = "a b + u-" }, | |
.{ .input = "- (a * b)", .expected_outcome = "a b * u-" }, | |
.{ .input = "--a", .expected_outcome = "a u- u-" }, | |
.{ .input = "a - -b", .expected_outcome = "a b u- -" }, | |
.{ .input = "-5 + 3", .expected_outcome = "5 u- 3 +" }, | |
.{ .input = "5 + -3", .expected_outcome = "5 3 u- +" }, | |
.{ .input = "-3.14 * -var", .expected_outcome = "3.14 u- var u- *" }, | |
// Function Calls (Zero, One, Multiple Arguments) | |
.{ .input = "func()", .expected_outcome = "func0" }, | |
.{ .input = "sqrt(x)", .expected_outcome = "x sqrt1" }, | |
.{ .input = "pow(a, b)", .expected_outcome = "a b pow2" }, | |
.{ .input = "max(a, b, c)", .expected_outcome = "a b c max3" }, | |
// Function Calls Combined with Operators and Parentheses | |
.{ .input = "func() + 1", .expected_outcome = "func0 1 +" }, | |
.{ .input = "a + sqrt(b)", .expected_outcome = "a b sqrt1 +" }, | |
.{ .input = "sqrt(a + b)", .expected_outcome = "a b + sqrt1" }, | |
.{ .input = "pow(a + b, 2)", .expected_outcome = "a b + 2 pow2" }, | |
// (Assuming pow happens first, then unary minus) | |
.{ .input = "-pow(a, b)", .expected_outcome = "a b pow2 u-" }, | |
.{ .input = "func(a, b * c)", .expected_outcome = "a b c * func2" }, | |
.{ .input = "func(a + 1, b - 2, c)", .expected_outcome = "a 1 + b 2 - c func3" }, | |
// (Assuming standard interpretation (sin(x))^2) | |
// (Note: Requires explicit parentheses (sin(x))^2 if the parser doesn't handle this specific postfix ^ syntax.) | |
.{ .input = "sin(x)^2", .expected_outcome = "x sin1 2 ^" }, | |
.{ .input = "(sqrt(x))^2", .expected_outcome = "x sqrt1 2 ^" }, | |
// Nested Function Calls | |
.{ .input = "func(g(x))", .expected_outcome = "x g1 func1" }, | |
.{ .input = "pow(sqrt(a), b)", .expected_outcome = "a sqrt1 b pow2" }, | |
.{ .input = "func(a, g(b, c + d), e)", .expected_outcome = "a b c d + g2 e func3" }, | |
.{ .input = "outer(inner1(x), inner2(y, z))", .expected_outcome = "x inner11 y z inner22 outer2" }, | |
// Complex Combinations | |
.{ .input = "-a * (b + c ^ d) / func(e, -f)", .expected_outcome = "a u- b c d ^ + * e f u- func2 /" }, | |
.{ .input = "(func(x, y) + z) ^ - (a % b)", .expected_outcome = "x y func2 z + a b % u- ^" }, | |
.{ .input = "3.14 * (radius ^ 2) - max(x, -y, 10)", .expected_outcome = "3.14 radius 2 ^ * x y u- 10 max3 -" }, | |
}; | |
// std.testing.log_level = .debug; | |
var nodes: [32]Node = undefined; | |
var error_state: ErrorState = undefined; | |
var buf = [_]u8 { 0 } ** 256; | |
for (test_cases, 0..) |test_case, i| { | |
// for (test_cases[46..47], 0..) |test_case, i| { | |
const root = parse(test_case.input, &nodes, &error_state) catch |e| { | |
try stderr.print("{: >3} testing {s}\n", .{ i, test_case.input }); | |
try stderr.print("{s}\n", .{ test_case.input }); | |
for (0..error_state.location) |_| { | |
try stderr.print(".", .{}); | |
} | |
try stderr.print("^\n", .{}); | |
try stderr.print("error: {s}\n", .{ error_state.message }); | |
return e; | |
}; | |
try std.testing.expectEqualSlices(u8, test_case.expected_outcome, try print_rpn(&buf, &nodes, root)); | |
} | |
} | |
test "parse error" { | |
// MissingCloseParenthesis, | |
// NotEnoughInput, | |
// Overflow, | |
// InvalidCharacter, | |
// UnknownOperator, | |
// UnexpectedToken, | |
// NodeArrayTooSmall, | |
const test_cases: []const struct { input: []const u8, expected_error_pattern: ParseError } = &.{ | |
// Mismatched Parentheses | |
.{ .input = "(a + b", .expected_error_pattern = ParseError.MissingCloseParenthesis }, | |
// .{ .input = "a + b)", .expected_error_pattern = ParseError.UnexpectedToken }, | |
.{ .input = "func(a, b", .expected_error_pattern = ParseError.MissingCloseParenthesis }, | |
.{ .input = "(a + (b * c)", .expected_error_pattern = ParseError.MissingCloseParenthesis }, | |
// .{ .input = "a + b) * c", .expected_error_pattern = ParseError.UnexpectedToken }, | |
// // Operator Issues | |
.{ .input = "a + * b", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Missing operand" | |
.{ .input = "* a", .expected_error_pattern = ParseError.UnexpectedToken }, | |
.{ .input = "a +", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "a /", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "a ^", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "a -", .expected_error_pattern = ParseError.UnexpectedEnd }, // Assuming binary | |
.{ .input = "-", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "a %", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "a ** b", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Unexpected operator" | |
// // Operand Issues | |
// .{ .input = "a b + c", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
// .{ .input = "5 10", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "(a +) b", .expected_error_pattern = ParseError.UnexpectedToken }, // Or error on '+' | |
.{ .input = "a + () * c", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Missing operand inside ()" | |
// // Function Call Issues | |
.{ .input = "func(a, , b)", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Empty argument" | |
.{ .input = "func(,a)", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Empty argument" | |
.{ .input = "func(a,)", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Trailing comma" | |
// .{ .input = "func a)", .expected_error_pattern = ParseError.UnexpectedToken }, | |
.{ .input = "func(a b)", .expected_error_pattern = ParseError.MissingCloseParenthesis }, // Or "Missing comma or ')'" | |
.{ .input = "func(a,", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
// // Invalid Characters / Tokens | |
.{ .input = "a + $b", .expected_error_pattern = ParseError.UnexpectedToken }, | |
.{ .input = "1_abc", .expected_error_pattern = ParseError.InvalidCharacter }, // If numbers can't start identifiers | |
.{ .input = ".", .expected_error_pattern = ParseError.UnexpectedToken }, // Or "Unexpected token '.'" | |
// .{ .input = "5.", .expected_error_pattern = ParseError.NodeArrayTooSmall }, // If trailing decimal not allowed alone | |
.{ .input = "#", .expected_error_pattern = ParseError.UnexpectedToken }, | |
// // Edge Cases | |
.{ .input = "", .expected_error_pattern = ParseError.UnexpectedEnd }, | |
.{ .input = "()", .expected_error_pattern = ParseError.UnexpectedToken }, | |
.{ .input = ",", .expected_error_pattern = ParseError.UnexpectedToken }, | |
}; | |
// std.testing.log_level = .debug; | |
var nodes: [32]Node = undefined; | |
var error_state: ErrorState = undefined; | |
for (test_cases, 0..) |test_case, i| { | |
// for (test_cases[12..13], 0..) |test_case, i| { | |
// try stderr.print("{: >3} testing {s}\n", .{ i, test_case.input }); | |
_ = i; | |
var error_raised = false; | |
_ = parse(test_case.input, &nodes, &error_state) catch { | |
error_raised = true; | |
try std.testing.expectEqual(test_case.expected_error_pattern, error_state.code); | |
}; | |
try std.testing.expectEqual(error_raised, true); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment