Created
April 13, 2011 06:26
-
-
Save joeyadams/917067 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2011 Joseph Adams <[email protected]> | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
* THE SOFTWARE. | |
*/ | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#define fatal(fmt, ...) do { \ | |
fprintf(stderr, fmt "\n", ##__VA_ARGS__); \ | |
exit(EXIT_FAILURE); \ | |
} while (0) | |
#define alloc(type) ((type *) malloc(sizeof(type))) | |
typedef struct Token Token; | |
typedef struct Value Value; | |
typedef struct Stack Stack; | |
typedef struct Frame Frame; | |
typedef enum {V_INT, V_STACK, V_BLOCK} ValueTag; | |
struct Token | |
{ | |
int c; | |
int line, col; | |
union { | |
/* when c == '{' */ | |
Token *block_body; | |
/* when c is 'a', 'A', or '\'' */ | |
int n; | |
/* when c == '?' */ | |
struct { | |
Token *on_true; | |
Token *on_false; | |
}; | |
}; | |
}; | |
struct Value | |
{ | |
ValueTag tag; | |
union { | |
/* V_INT */ | |
int n; | |
/* V_STACK */ | |
Stack *stack; | |
/* V_BLOCK */ | |
struct { | |
Token *code; | |
Stack *stack; | |
Frame *frame; | |
} block; | |
}; | |
}; | |
struct Stack | |
{ | |
Stack *next; | |
Value *value; | |
}; | |
struct Frame | |
{ | |
Frame *next; | |
Stack *stack; | |
}; | |
/* | |
* UTF-8 conversion functions based on code from PostgreSQL | |
* | |
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group | |
* Portions Copyright (c) 1994, Regents of the University of California | |
*/ | |
#define REPLACEMENT_CHARACTER 0xFFFD | |
static int utf8_mblen(unsigned char c) | |
{ | |
if ((c & 0x80) == 0) | |
return 1; | |
else if ((c & 0xe0) == 0xc0) | |
return 2; | |
else if ((c & 0xf0) == 0xe0) | |
return 3; | |
else if ((c & 0xf8) == 0xf0) | |
return 4; | |
else | |
return 1; | |
} | |
/* | |
* Convert a UTF-8 character to a Unicode code point. | |
* No error checks here, c must point to a long-enough string. | |
*/ | |
static unsigned int utf8_to_unicode(const char *sc) | |
{ | |
const unsigned char *c = (const unsigned char *) sc; | |
if ((*c & 0x80) == 0) | |
return c[0]; | |
else if ((*c & 0xe0) == 0xc0) | |
return (((c[0] & 0x1f) << 6) | | |
(c[1] & 0x3f)); | |
else if ((*c & 0xf0) == 0xe0) | |
return (((c[0] & 0x0f) << 12) | | |
((c[1] & 0x3f) << 6) | | |
(c[2] & 0x3f)); | |
else if ((*c & 0xf8) == 0xf0) | |
return (((c[0] & 0x07) << 18) | | |
((c[1] & 0x3f) << 12) | | |
((c[2] & 0x3f) << 6) | | |
(c[3] & 0x3f)); | |
else | |
return REPLACEMENT_CHARACTER; | |
} | |
/* | |
* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of | |
* space allocated. | |
* | |
* Return the length of the string generated. | |
*/ | |
int unicode_to_utf8(unsigned int c, unsigned char *utf8string) | |
{ | |
if (c <= 0x7F) { | |
utf8string[0] = c; | |
return 1; | |
} else if (c <= 0x7FF) { | |
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F); | |
utf8string[1] = 0x80 | (c & 0x3F); | |
return 2; | |
} else if (c <= 0xFFFF) { | |
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F); | |
utf8string[1] = 0x80 | ((c >> 6) & 0x3F); | |
utf8string[2] = 0x80 | (c & 0x3F); | |
return 3; | |
} else { | |
utf8string[0] = 0xF0 | ((c >> 18) & 0x07); | |
utf8string[1] = 0x80 | ((c >> 12) & 0x3F); | |
utf8string[2] = 0x80 | ((c >> 6) & 0x3F); | |
utf8string[3] = 0x80 | (c & 0x3F); | |
return 4; | |
} | |
} | |
static int lineno = 1, colno = 0; | |
static int utf8_fgetc(FILE *f) | |
{ | |
int c; | |
char buffer[4]; | |
int i; | |
int len; | |
c = fgetc(f); | |
if (c == EOF) | |
return EOF; | |
colno++; | |
buffer[0] = c; | |
len = utf8_mblen(c); | |
assert(len <= (int) sizeof(buffer)); | |
for (i = 1; i < len; i++) { | |
c = fgetc(f); | |
if (c == EOF) | |
return REPLACEMENT_CHARACTER; | |
buffer[i] = c; | |
} | |
if (c == '\n') { | |
lineno++; | |
colno = 0; | |
} | |
return utf8_to_unicode(buffer); | |
} | |
static int utf8_fputc(int c, FILE *f) | |
{ | |
unsigned char buffer[4]; | |
int i; | |
int len; | |
if (c < 0) | |
c = REPLACEMENT_CHARACTER; | |
len = unicode_to_utf8(c, buffer); | |
for (i = 0; i < len; i++) | |
if (fputc(buffer[i], f) == EOF) | |
return EOF; | |
return c; | |
} | |
/* | |
* Dynamic array macros yanked from my darray module: | |
* | |
* http://ccan.ozlabs.org/info/darray.html | |
*/ | |
#define darray(type) struct {type *item; size_t size; size_t alloc;} | |
#define darray_new() {0,0,0} | |
#define darray_append(arr, ...) do { \ | |
darray_resize(arr, (arr).size+1); \ | |
(arr).item[(arr).size-1] = (__VA_ARGS__); \ | |
} while(0) | |
#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize)) | |
#define darray_realloc(arr, newAlloc) do { \ | |
(arr).item = realloc((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \ | |
} while(0) | |
#define darray_growalloc(arr, need) do { \ | |
size_t __need = (need); \ | |
if (__need > (arr).alloc) \ | |
darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \ | |
} while(0) | |
static inline size_t darray_next_alloc(size_t alloc, size_t need) | |
{ | |
if (alloc == 0) | |
alloc = 1; | |
while (alloc < need) | |
alloc *= 2; | |
return alloc; | |
} | |
/* | |
* Now back to our regularly scheduled programming. | |
*/ | |
static int div_(int n, int d) | |
{ | |
int div = n / d; | |
int mod = n % d; | |
if ((mod < 0 && d > 0) || (mod > 0 && d < 0)) | |
div--; | |
return div; | |
} | |
static int mod_(int n, int d) | |
{ | |
int mod = n % d; | |
if ((mod < 0 && d > 0) || (mod > 0 && d < 0)) | |
mod += d; | |
return mod; | |
} | |
static const char *valueTypeString(Value *v) | |
{ | |
switch (v->tag) { | |
case V_INT: | |
return "an int"; | |
case V_STACK: | |
return "a stack"; | |
case V_BLOCK: | |
return "a block"; | |
default: | |
return "an <<invalid type>>"; | |
} | |
} | |
static Value *mkInt(int n) | |
{ | |
Value *v = alloc(Value); | |
v->tag = V_INT; | |
v->n = n; | |
return v; | |
} | |
static Value *mkStack(Stack *stack) | |
{ | |
Value *v = alloc(Value); | |
v->tag = V_STACK; | |
v->stack = stack; | |
return v; | |
} | |
static Value *mkBlock(Token *code, Stack *stack, Frame *frame) | |
{ | |
Value *v = alloc(Value); | |
v->tag = V_BLOCK; | |
v->block.code = code; | |
v->block.stack = stack; | |
v->block.frame = frame; | |
return v; | |
} | |
static Stack *push(Stack *s, Value *v) | |
{ | |
Stack *ret = alloc(Stack); | |
assert(v != NULL); | |
ret->next = s; | |
ret->value = v; | |
return ret; | |
} | |
static Frame *pushFrame(Frame *f, Stack *stack) | |
{ | |
Frame *ret = alloc(Frame); | |
ret->next = f; | |
ret->stack = stack; | |
return ret; | |
} | |
static Value *findVar(Stack *s, int n) | |
{ | |
assert(n >= 0); | |
while (n-- > 0) { | |
s = s->next; | |
if (s == NULL) | |
return NULL; | |
} | |
return s->value; | |
} | |
static Token *parse(FILE *in, char endc) | |
{ | |
darray(Token) arr = darray_new(); | |
for (;;) { | |
int c = utf8_fgetc(in); | |
Token tok; | |
use_character: | |
if (c == EOF) { | |
if (ferror(in)) | |
perror("blockscript"); | |
fatal("%d: Unexpected end of file", lineno); | |
} | |
tok.line = lineno; | |
tok.col = colno; | |
switch (c) { | |
case '!': | |
case '&': | |
case '@': | |
case '[': | |
case ']': | |
case ',': | |
case '.': | |
case '+': | |
case '-': | |
case '*': | |
case '/': | |
case '%': | |
case '<': | |
case '>': | |
case '=': | |
case 0x2260: /* not equal */ | |
case 0x2264: /* less than or equal */ | |
case 0x2265: /* greater than or equal */ | |
tok.c = c; | |
break; | |
case '{': | |
tok.c = c; | |
tok.block_body = parse(in, '}'); | |
break; | |
case '?': | |
tok.c = c; | |
tok.on_true = parse(in, ':'); | |
tok.on_false = parse(in, endc); | |
darray_append(arr, tok); | |
return arr.item; | |
case '\'': | |
tok.c = c; | |
c = utf8_fgetc(in); | |
if (c == EOF) { | |
if (ferror(in)) | |
perror("blockscript"); | |
fatal("%d: Unexpected end of file", lineno); | |
} | |
tok.n = c; | |
break; | |
case '#': | |
/* Comment to end of line */ | |
for (;;) { | |
c = utf8_fgetc(in); | |
if (c == EOF) { | |
if (ferror(in)) | |
perror("blockscript"); | |
fatal("%d: Unexpected end of file", lineno); | |
} | |
if (c == '\n') | |
break; | |
} | |
continue; | |
default: | |
if (c == endc) { | |
tok.c = '\0'; | |
darray_append(arr, tok); | |
return arr.item; | |
} | |
if (c == ':') | |
fatal("%d: Syntax error: `:' without opening `?'", lineno); | |
if (c == ';') | |
{ | |
assert(endc == ':' || endc == '}'); | |
fatal("%d: Semicolon cannot appear inside of %c ... %c", | |
lineno, | |
endc == ':' ? '?' : '{', | |
endc == ':' ? ':' : '}'); | |
} | |
if (c == '}') | |
fatal("%d: Syntax error: `}' without opening `{'", lineno); | |
if (c >= 'a' && c <= 'z') { | |
tok.c = 'a'; | |
tok.n = c - 'a'; | |
break; | |
} | |
if (c >= 'A' && c <= 'Z') { | |
tok.c = 'A'; | |
tok.n = c - 'A'; | |
break; | |
} | |
if (c >= '0' && c <= '9') { | |
tok.c = '\''; | |
tok.n = c - '0'; | |
for (;;) { | |
c = utf8_fgetc(in); | |
if (c >= '0' && c <= '9') { | |
tok.n *= 10; | |
tok.n += c - '0'; | |
} else { | |
darray_append(arr, tok); | |
goto use_character; | |
} | |
} | |
} | |
continue; | |
} | |
darray_append(arr, tok); | |
} | |
assert(0); | |
} | |
static Value *call(Token *code, Stack *stack, Frame *frame) | |
{ | |
for (;; code++) { | |
Value *head; | |
Value *result; | |
no_increment: | |
if (code->c == '\0') | |
return stack->value; | |
head = stack->value; | |
result = NULL; | |
switch (code->c) { | |
case '{': | |
result = mkBlock(code->block_body, stack, frame); | |
break; | |
case '!': | |
if (head->tag != V_BLOCK) | |
fatal("%d:%d: Cannot call %s", | |
code->line, code->col, valueTypeString(head)); | |
if ((code+1)->c == '\0') { | |
/* tail call */ | |
code = head->block.code; | |
stack = push(head->block.stack, mkStack(stack)); | |
frame = head->block.frame; | |
goto no_increment; | |
} else { | |
result = call(head->block.code, | |
push(head->block.stack, mkStack(stack)), | |
head->block.frame); | |
break; | |
} | |
case '?': /* This operator doesn't push a value to the stack. */ | |
if (head->tag == V_INT && head->n == 0) | |
code = code->on_false; | |
else | |
code = code->on_true; | |
goto no_increment; | |
case 'a': | |
result = findVar(stack, code->n); | |
if (result == NULL) | |
fatal("%d:%d: Stack index `%c' is too high", | |
code->line, code->col, | |
'a' + code->n); | |
break; | |
case 'A': | |
if (frame == NULL) | |
fatal("%d:%d: Cannot use %c without opening a frame with [", | |
code->line, code->col, | |
'A' + code->n); | |
result = findVar(frame->stack, code->n); | |
if (result == NULL) | |
fatal("%d:%d: Frame index `%c' is too high", | |
code->line, code->col, | |
'A' + code->n); | |
break; | |
case '&': | |
result = mkStack(stack); | |
break; | |
case '@': /* This operator changes the stack completely. */ | |
if (head->tag != V_STACK) | |
fatal("%d:%d: Cannot use @ on %s", | |
code->line, code->col, | |
valueTypeString(head)); | |
stack = head->stack; | |
continue; | |
case '[': /* This operator doesn't push a value to the stack. */ | |
if (head->tag != V_STACK) | |
fatal("%d:%d: Cannot use [ on %s", | |
code->line, code->col, | |
valueTypeString(head)); | |
frame = pushFrame(frame, head->stack); | |
continue; | |
case ']': /* This operator doesn't push a value to the stack. */ | |
if (frame == NULL) | |
fatal("%d:%d: ] does not have a matching [ in scope", | |
code->line, code->col); | |
frame = frame->next; | |
continue; | |
case '\'': | |
result = mkInt(code->n); | |
break; | |
case ',': | |
{ | |
int c = utf8_fgetc(stdin); | |
if (c == EOF && ferror(stdin)) | |
perror("blockscript: "); | |
result = mkInt(c); | |
} | |
break; | |
case '.': /* This operator doesn't push a value to the stack. */ | |
if (head->tag != V_INT) | |
fatal("%d:%d: Cannot print %s", | |
code->line, code->col, | |
valueTypeString(head)); | |
utf8_fputc(head->n, stdout); | |
continue; | |
#define INTOP(expr, name) { \ | |
Value *bv, *av; \ | |
int b, a; \ | |
\ | |
if (stack->next == NULL) \ | |
fatal("%d:%d: Too few arguments on stack for `" name "'", \ | |
code->line, code->col); \ | |
\ | |
bv = stack->next->value; \ | |
av = head; \ | |
\ | |
if (bv->tag != V_INT || av->tag != V_INT) \ | |
fatal("%d:%d: Cannot use `" name "' on %s and %s", \ | |
code->line, code->col, \ | |
valueTypeString(bv), valueTypeString(av)); \ | |
\ | |
b = bv->n; \ | |
a = av->n; \ | |
\ | |
result = mkInt(expr); \ | |
} break; | |
case '+': INTOP(b + a, "+") | |
case '-': INTOP(b - a, "-") | |
case '*': INTOP(b * a, "*") | |
case '/': INTOP(div_(b, a), "/") | |
case '%': INTOP(mod_(b, a), "%%") | |
case '<': INTOP(b < a, "<") | |
case '>': INTOP(b > a, ">") | |
case '=': INTOP(b == a, "=") | |
case 0x2260: INTOP(b != a, "≠") | |
case 0x2264: INTOP(b <= a, "≤") | |
case 0x2265: INTOP(b >= a, "≥") | |
#undef INTOP | |
default: | |
fatal("%d:%d: Internal error: invalid token `%c'", | |
code->line, code->col, | |
code->c); | |
} | |
assert(result != NULL); | |
stack = push(stack, result); | |
} | |
return stack->value; | |
} | |
static void execute(Token *code) | |
{ | |
call(code, push(NULL, mkStack(push(NULL, mkBlock(code, NULL, NULL)))), NULL); | |
} | |
int main(void) | |
{ | |
/* Ensure int is big enough to hold Unicode characters. */ | |
assert(sizeof(int) >= 4); | |
execute(parse(stdin, ';')); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment