Created
April 6, 2026 01:27
-
-
Save HarryR/27a24494ce4906e923a24ee8feaef2e9 to your computer and use it in GitHub Desktop.
Recursively styled C interpreter for Andrew Davison's minBASIC: https://coe.psu.ac.th/ad/minBASIC/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * mbx.c — Minimal BASIC Interpreter | |
| * | |
| * C port of minBASIC/mbx.py | |
| * Uses the call stack as program storage: each recursive parse() call | |
| * defines a Stmt on its stack frame, linked via prev/next pointers. | |
| * | |
| * program ::= { [label] statement } | |
| * statement ::= 'P' (var | string | '!' | '-') | |
| * | 'I' var | |
| * | var expr | |
| * | 'J' atom label | |
| * expr ::= atom [ op atom ] | |
| * atom ::= var | int | |
| * op ::= '+' | '-' | '*' | '/' | '&' | '|' | '=' | '!' | '<' | '>' | |
| * var ::= a-z | |
| * label ::= A-Z (excluding P, J, I) | |
| * int ::= (0-9)+ | |
| * string ::= '"' [^"]* '"' | |
| */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <ctype.h> | |
| /* --- Types --- */ | |
| /* Atom: kind is 'i' (int), 'v' (var), or 0 (none) */ | |
| typedef struct { | |
| char kind; | |
| union { int ival; char var; }; | |
| } Atom; | |
| typedef struct Stmt Stmt; | |
| struct Stmt { | |
| Stmt *link[3]; /* 0=prev, 1=next, 2=jump */ | |
| char tag[2]; /* 0=label ('\0' if none), 1=kind ('P','I','=','J') */ | |
| int line; | |
| union { | |
| struct { char var; char op; Atom a[2]; } assign; /* a[0]=left, a[1]=right */ | |
| struct { char pk; char var; char str[128]; } print; /* pk: 'v' or 's' */ | |
| struct { char var; } input; | |
| struct { char label; Atom cond; } ifgoto; | |
| }; | |
| }; | |
| /* --- Error helpers --- */ | |
| static void lex_error(int line, const char *msg) { | |
| fprintf(stderr, "lex error on line %d: %s\n", line, msg); | |
| exit(1); | |
| } | |
| static void parse_error(int line, const char *msg) { | |
| fprintf(stderr, "parse error on line %d: %s\n", line, msg); | |
| exit(1); | |
| } | |
| static void run_error(int line, const char *msg) { | |
| fprintf(stderr, "run error on line %d: %s\n", line, msg); | |
| exit(1); | |
| } | |
| /* --- Lexing helpers --- */ | |
| static int skip_ws(const char **src) { | |
| int nl = 0; | |
| for (;;) { | |
| char ch = **src; | |
| if (ch == ' ' || ch == '\t' || ch == '\r' || ch == ';') { | |
| (*src)++; | |
| } else if (ch == '\n') { | |
| nl++; | |
| (*src)++; | |
| } else if (ch == '#') { | |
| while (**src && **src != '\n') | |
| (*src)++; | |
| } else { | |
| break; | |
| } | |
| } | |
| return nl; | |
| } | |
| static int lex_int(const char **src) { | |
| int val = 0; | |
| while (isdigit((unsigned char)**src)) { | |
| val = val * 10 + (**src - '0'); | |
| (*src)++; | |
| } | |
| return val; | |
| } | |
| static Atom lex_atom(const char **src, int line) { | |
| Atom a = {0}; | |
| skip_ws(src); | |
| char ch = **src; | |
| if (isdigit((unsigned char)ch)) { | |
| a.kind = 'i'; | |
| a.ival = lex_int(src); | |
| } else if (islower((unsigned char)ch)) { | |
| a.kind = 'v'; | |
| a.var = ch; | |
| (*src)++; | |
| } else { | |
| parse_error(line, "expected variable or integer"); | |
| } | |
| return a; | |
| } | |
| static void lex_string(const char **src, char *buf, int maxlen, int line) { | |
| (*src)++; | |
| int i = 0; | |
| while (**src && **src != '"') { | |
| if (i < maxlen - 1) | |
| buf[i++] = **src; | |
| (*src)++; | |
| } | |
| if (**src != '"') | |
| lex_error(line, "unterminated string literal"); | |
| (*src)++; | |
| buf[i] = '\0'; | |
| } | |
| /* --- Evaluate an atom --- */ | |
| static int eval_atom(Atom *a, int *vars, int *defined, int line) { | |
| if (a->kind == 'i') | |
| return a->ival; | |
| int vi = a->var - 'a'; | |
| if (!defined[vi]) | |
| run_error(line, "undefined variable"); | |
| return vars[vi]; | |
| } | |
| /* --- Parser --- */ | |
| static void resolve_and_run(Stmt *last); | |
| static void parse(const char **src, Stmt *prev, int line) { | |
| Stmt s = {0}; | |
| s.link[0] = prev; | |
| line += skip_ws(src); | |
| s.line = line; | |
| if (**src == '\0') { | |
| resolve_and_run(prev); | |
| return; | |
| } | |
| char ch = **src; | |
| /* Check for label */ | |
| if (isupper((unsigned char)ch) && ch != 'P' && ch != 'J' && ch != 'I') { | |
| s.tag[0] = ch; | |
| (*src)++; | |
| line += skip_ws(src); | |
| s.line = line; | |
| ch = **src; | |
| } | |
| /* Parse statement */ | |
| if (ch == 'P') { | |
| (*src)++; | |
| line += skip_ws(src); | |
| ch = **src; | |
| s.tag[1] = 'P'; | |
| if (islower((unsigned char)ch)) { | |
| s.print.pk = 'v'; | |
| s.print.var = ch; | |
| (*src)++; | |
| } else if (ch == '"') { | |
| s.print.pk = 's'; | |
| lex_string(src, s.print.str, sizeof(s.print.str), line); | |
| } else if (ch == '!' || ch == '-') { | |
| s.print.pk = 's'; | |
| s.print.str[0] = ch; | |
| s.print.str[1] = '\0'; | |
| (*src)++; | |
| } else { | |
| parse_error(line, "expected variable or string after P"); | |
| } | |
| } else if (ch == 'I') { | |
| (*src)++; | |
| skip_ws(src); | |
| s.tag[1] = 'I'; | |
| if (!islower((unsigned char)**src)) | |
| parse_error(line, "expected variable after I"); | |
| s.input.var = **src; | |
| (*src)++; | |
| } else if (islower((unsigned char)ch)) { | |
| s.tag[1] = '='; | |
| s.assign.var = ch; | |
| (*src)++; | |
| s.assign.a[0] = lex_atom(src, line); | |
| skip_ws(src); | |
| char next = **src; | |
| if (next == '+' || next == '-' || next == '*' || next == '/' || | |
| next == '=' || next == '!' || next == '<' || next == '>' || | |
| next == '&' || next == '|') { | |
| s.assign.op = next; | |
| (*src)++; | |
| s.assign.a[1] = lex_atom(src, line); | |
| } | |
| } else if (ch == 'J') { | |
| (*src)++; | |
| s.tag[1] = 'J'; | |
| s.ifgoto.cond = lex_atom(src, line); | |
| skip_ws(src); | |
| if (!isupper((unsigned char)**src)) | |
| parse_error(line, "expected label after J condition"); | |
| s.ifgoto.label = **src; | |
| (*src)++; | |
| } else if (ch == '\0') { | |
| resolve_and_run(prev); | |
| return; | |
| } else { | |
| char msg[64]; | |
| snprintf(msg, sizeof(msg), "unexpected character '%c'", ch); | |
| parse_error(line, msg); | |
| } | |
| parse(src, &s, line); | |
| } | |
| /* --- Resolve: wire next pointers and patch jumps --- */ | |
| static void resolve_and_run(Stmt *last) { | |
| if (!last) | |
| return; | |
| Stmt *labels[26] = {0}; | |
| /* Walk backward via link[0] (prev), set link[1] (next), collect labels */ | |
| Stmt *first = last; | |
| last->link[1] = NULL; | |
| for (Stmt *s = last; s; s = s->link[0]) { | |
| first = s; | |
| if (s->link[0]) | |
| s->link[0]->link[1] = s; | |
| if (s->tag[0]) { | |
| int idx = s->tag[0] - 'A'; | |
| if (labels[idx]) { | |
| char msg[64]; | |
| snprintf(msg, sizeof(msg), "duplicate label '%c'", s->tag[0]); | |
| parse_error(s->line, msg); | |
| } | |
| labels[idx] = s; | |
| } | |
| } | |
| /* Patch link[2] (jump) targets */ | |
| for (Stmt *s = first; s; s = s->link[1]) { | |
| if (s->tag[1] == 'J') { | |
| int idx = s->ifgoto.label - 'A'; | |
| if (!labels[idx]) { | |
| char msg[64]; | |
| snprintf(msg, sizeof(msg), "unknown label '%c'", s->ifgoto.label); | |
| parse_error(s->line, msg); | |
| } | |
| s->link[2] = labels[idx]; | |
| } | |
| } | |
| /* --- Execute --- */ | |
| int vars[26] = {0}; | |
| int defined[26] = {0}; | |
| for (Stmt *pc = first; pc; ) { | |
| switch (pc->tag[1]) { | |
| case '=': { | |
| int lval = eval_atom(&pc->assign.a[0], vars, defined, pc->line); | |
| if (pc->assign.op) { | |
| int rval = eval_atom(&pc->assign.a[1], vars, defined, pc->line); | |
| switch (pc->assign.op) { | |
| case '+': lval = lval + rval; break; | |
| case '-': lval = lval - rval; break; | |
| case '*': lval = lval * rval; break; | |
| case '/': | |
| if (rval == 0) run_error(pc->line, "division by zero"); | |
| { int q = lval / rval; | |
| if ((lval ^ rval) < 0 && lval % rval != 0) q--; | |
| lval = q; } | |
| break; | |
| case '=': lval = (lval == rval) ? 1 : 0; break; | |
| case '!': lval = (lval != rval) ? 1 : 0; break; | |
| case '<': lval = (lval < rval) ? 1 : 0; break; | |
| case '>': lval = (lval > rval) ? 1 : 0; break; | |
| case '&': lval = (lval && rval) ? 1 : 0; break; | |
| case '|': lval = (lval || rval) ? 1 : 0; break; | |
| default: | |
| run_error(pc->line, "unknown operator"); | |
| } | |
| } | |
| int vi = pc->assign.var - 'a'; | |
| vars[vi] = lval; | |
| defined[vi] = 1; | |
| pc = pc->link[1]; | |
| break; | |
| } | |
| case 'P': { | |
| if (pc->print.pk == 'v') { | |
| int vi = pc->print.var - 'a'; | |
| if (!defined[vi]) | |
| run_error(pc->line, "undefined variable"); | |
| printf("%d ", vars[vi]); | |
| } else { | |
| const char *s = pc->print.str; | |
| int len = (int)strlen(s); | |
| if (len > 0 && s[len-1] == '!') { | |
| printf("%.*s\n", len - 1, s); | |
| } else if (len == 0 || (len == 1 && s[0] == '-')) { | |
| /* NOP / empty print */ | |
| } else { | |
| printf("%s ", s); | |
| } | |
| } | |
| pc = pc->link[1]; | |
| break; | |
| } | |
| case 'I': { | |
| printf("?? "); | |
| int val; | |
| if (scanf("%d", &val) != 1) | |
| run_error(pc->line, "invalid integer input"); | |
| int vi = pc->input.var - 'a'; | |
| vars[vi] = val; | |
| defined[vi] = 1; | |
| pc = pc->link[1]; | |
| break; | |
| } | |
| case 'J': { | |
| int cval = eval_atom(&pc->ifgoto.cond, vars, defined, pc->line); | |
| pc = cval ? pc->link[2] : pc->link[1]; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| /* --- Main --- */ | |
| int main(int argc, char **argv) { | |
| if (argc != 2) { | |
| fprintf(stderr, "usage: mbx <file.bas>|\"<string>\"\n"); | |
| return 1; | |
| } | |
| char *src = NULL; | |
| FILE *f = fopen(argv[1], "r"); | |
| if (f) { | |
| size_t len = strlen(argv[1]); | |
| if (len < 4 || strcmp(argv[1] + len - 4, ".bas") != 0) { | |
| fprintf(stderr, "Error: input file must have a .bas extension\n"); | |
| fclose(f); | |
| return 1; | |
| } | |
| fseek(f, 0, SEEK_END); | |
| long sz = ftell(f); | |
| fseek(f, 0, SEEK_SET); | |
| src = malloc(sz + 1); | |
| if (!src) { perror("malloc"); return 1; } | |
| fread(src, 1, sz, f); | |
| src[sz] = '\0'; | |
| fclose(f); | |
| } else { | |
| src = malloc(strlen(argv[1]) + 1); | |
| if (!src) { perror("malloc"); return 1; } | |
| strcpy(src, argv[1]); | |
| for (char *p = src; *p; p++) | |
| if (*p == ';') *p = ' '; | |
| } | |
| const char *p = src; | |
| parse(&p, NULL, 1); | |
| free(src); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment