Skip to content

Instantly share code, notes, and snippets.

@joeyadams
Created April 13, 2011 06:26
Show Gist options
  • Save joeyadams/917067 to your computer and use it in GitHub Desktop.
Save joeyadams/917067 to your computer and use it in GitHub Desktop.
/*
* Copyright (C) 2011 Joseph Adams <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#define fatal(fmt, ...) do { \
fprintf(stderr, fmt "\n", ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
} while (0)
#define alloc(type) ((type *) malloc(sizeof(type)))
typedef struct Token Token;
typedef struct Value Value;
typedef struct Stack Stack;
typedef struct Frame Frame;
typedef enum {V_INT, V_STACK, V_BLOCK} ValueTag;
struct Token
{
int c;
int line, col;
union {
/* when c == '{' */
Token *block_body;
/* when c is 'a', 'A', or '\'' */
int n;
/* when c == '?' */
struct {
Token *on_true;
Token *on_false;
};
};
};
struct Value
{
ValueTag tag;
union {
/* V_INT */
int n;
/* V_STACK */
Stack *stack;
/* V_BLOCK */
struct {
Token *code;
Stack *stack;
Frame *frame;
} block;
};
};
struct Stack
{
Stack *next;
Value *value;
};
struct Frame
{
Frame *next;
Stack *stack;
};
/*
* UTF-8 conversion functions based on code from PostgreSQL
*
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*/
#define REPLACEMENT_CHARACTER 0xFFFD
static int utf8_mblen(unsigned char c)
{
if ((c & 0x80) == 0)
return 1;
else if ((c & 0xe0) == 0xc0)
return 2;
else if ((c & 0xf0) == 0xe0)
return 3;
else if ((c & 0xf8) == 0xf0)
return 4;
else
return 1;
}
/*
* Convert a UTF-8 character to a Unicode code point.
* No error checks here, c must point to a long-enough string.
*/
static unsigned int utf8_to_unicode(const char *sc)
{
const unsigned char *c = (const unsigned char *) sc;
if ((*c & 0x80) == 0)
return c[0];
else if ((*c & 0xe0) == 0xc0)
return (((c[0] & 0x1f) << 6) |
(c[1] & 0x3f));
else if ((*c & 0xf0) == 0xe0)
return (((c[0] & 0x0f) << 12) |
((c[1] & 0x3f) << 6) |
(c[2] & 0x3f));
else if ((*c & 0xf8) == 0xf0)
return (((c[0] & 0x07) << 18) |
((c[1] & 0x3f) << 12) |
((c[2] & 0x3f) << 6) |
(c[3] & 0x3f));
else
return REPLACEMENT_CHARACTER;
}
/*
* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
* space allocated.
*
* Return the length of the string generated.
*/
int unicode_to_utf8(unsigned int c, unsigned char *utf8string)
{
if (c <= 0x7F) {
utf8string[0] = c;
return 1;
} else if (c <= 0x7FF) {
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
utf8string[1] = 0x80 | (c & 0x3F);
return 2;
} else if (c <= 0xFFFF) {
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
utf8string[2] = 0x80 | (c & 0x3F);
return 3;
} else {
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
utf8string[3] = 0x80 | (c & 0x3F);
return 4;
}
}
static int lineno = 1, colno = 0;
static int utf8_fgetc(FILE *f)
{
int c;
char buffer[4];
int i;
int len;
c = fgetc(f);
if (c == EOF)
return EOF;
colno++;
buffer[0] = c;
len = utf8_mblen(c);
assert(len <= (int) sizeof(buffer));
for (i = 1; i < len; i++) {
c = fgetc(f);
if (c == EOF)
return REPLACEMENT_CHARACTER;
buffer[i] = c;
}
if (c == '\n') {
lineno++;
colno = 0;
}
return utf8_to_unicode(buffer);
}
static int utf8_fputc(int c, FILE *f)
{
unsigned char buffer[4];
int i;
int len;
if (c < 0)
c = REPLACEMENT_CHARACTER;
len = unicode_to_utf8(c, buffer);
for (i = 0; i < len; i++)
if (fputc(buffer[i], f) == EOF)
return EOF;
return c;
}
/*
* Dynamic array macros yanked from my darray module:
*
* http://ccan.ozlabs.org/info/darray.html
*/
#define darray(type) struct {type *item; size_t size; size_t alloc;}
#define darray_new() {0,0,0}
#define darray_append(arr, ...) do { \
darray_resize(arr, (arr).size+1); \
(arr).item[(arr).size-1] = (__VA_ARGS__); \
} while(0)
#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize))
#define darray_realloc(arr, newAlloc) do { \
(arr).item = realloc((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \
} while(0)
#define darray_growalloc(arr, need) do { \
size_t __need = (need); \
if (__need > (arr).alloc) \
darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \
} while(0)
static inline size_t darray_next_alloc(size_t alloc, size_t need)
{
if (alloc == 0)
alloc = 1;
while (alloc < need)
alloc *= 2;
return alloc;
}
/*
* Now back to our regularly scheduled programming.
*/
static int div_(int n, int d)
{
int div = n / d;
int mod = n % d;
if ((mod < 0 && d > 0) || (mod > 0 && d < 0))
div--;
return div;
}
static int mod_(int n, int d)
{
int mod = n % d;
if ((mod < 0 && d > 0) || (mod > 0 && d < 0))
mod += d;
return mod;
}
static const char *valueTypeString(Value *v)
{
switch (v->tag) {
case V_INT:
return "an int";
case V_STACK:
return "a stack";
case V_BLOCK:
return "a block";
default:
return "an <<invalid type>>";
}
}
static Value *mkInt(int n)
{
Value *v = alloc(Value);
v->tag = V_INT;
v->n = n;
return v;
}
static Value *mkStack(Stack *stack)
{
Value *v = alloc(Value);
v->tag = V_STACK;
v->stack = stack;
return v;
}
static Value *mkBlock(Token *code, Stack *stack, Frame *frame)
{
Value *v = alloc(Value);
v->tag = V_BLOCK;
v->block.code = code;
v->block.stack = stack;
v->block.frame = frame;
return v;
}
static Stack *push(Stack *s, Value *v)
{
Stack *ret = alloc(Stack);
assert(v != NULL);
ret->next = s;
ret->value = v;
return ret;
}
static Frame *pushFrame(Frame *f, Stack *stack)
{
Frame *ret = alloc(Frame);
ret->next = f;
ret->stack = stack;
return ret;
}
static Value *findVar(Stack *s, int n)
{
assert(n >= 0);
while (n-- > 0) {
s = s->next;
if (s == NULL)
return NULL;
}
return s->value;
}
static Token *parse(FILE *in, char endc)
{
darray(Token) arr = darray_new();
for (;;) {
int c = utf8_fgetc(in);
Token tok;
use_character:
if (c == EOF) {
if (ferror(in))
perror("blockscript");
fatal("%d: Unexpected end of file", lineno);
}
tok.line = lineno;
tok.col = colno;
switch (c) {
case '!':
case '&':
case '@':
case '[':
case ']':
case ',':
case '.':
case '+':
case '-':
case '*':
case '/':
case '%':
case '<':
case '>':
case '=':
case 0x2260: /* not equal */
case 0x2264: /* less than or equal */
case 0x2265: /* greater than or equal */
tok.c = c;
break;
case '{':
tok.c = c;
tok.block_body = parse(in, '}');
break;
case '?':
tok.c = c;
tok.on_true = parse(in, ':');
tok.on_false = parse(in, endc);
darray_append(arr, tok);
return arr.item;
case '\'':
tok.c = c;
c = utf8_fgetc(in);
if (c == EOF) {
if (ferror(in))
perror("blockscript");
fatal("%d: Unexpected end of file", lineno);
}
tok.n = c;
break;
case '#':
/* Comment to end of line */
for (;;) {
c = utf8_fgetc(in);
if (c == EOF) {
if (ferror(in))
perror("blockscript");
fatal("%d: Unexpected end of file", lineno);
}
if (c == '\n')
break;
}
continue;
default:
if (c == endc) {
tok.c = '\0';
darray_append(arr, tok);
return arr.item;
}
if (c == ':')
fatal("%d: Syntax error: `:' without opening `?'", lineno);
if (c == ';')
{
assert(endc == ':' || endc == '}');
fatal("%d: Semicolon cannot appear inside of %c ... %c",
lineno,
endc == ':' ? '?' : '{',
endc == ':' ? ':' : '}');
}
if (c == '}')
fatal("%d: Syntax error: `}' without opening `{'", lineno);
if (c >= 'a' && c <= 'z') {
tok.c = 'a';
tok.n = c - 'a';
break;
}
if (c >= 'A' && c <= 'Z') {
tok.c = 'A';
tok.n = c - 'A';
break;
}
if (c >= '0' && c <= '9') {
tok.c = '\'';
tok.n = c - '0';
for (;;) {
c = utf8_fgetc(in);
if (c >= '0' && c <= '9') {
tok.n *= 10;
tok.n += c - '0';
} else {
darray_append(arr, tok);
goto use_character;
}
}
}
continue;
}
darray_append(arr, tok);
}
assert(0);
}
static Value *call(Token *code, Stack *stack, Frame *frame)
{
for (;; code++) {
Value *head;
Value *result;
no_increment:
if (code->c == '\0')
return stack->value;
head = stack->value;
result = NULL;
switch (code->c) {
case '{':
result = mkBlock(code->block_body, stack, frame);
break;
case '!':
if (head->tag != V_BLOCK)
fatal("%d:%d: Cannot call %s",
code->line, code->col, valueTypeString(head));
if ((code+1)->c == '\0') {
/* tail call */
code = head->block.code;
stack = push(head->block.stack, mkStack(stack));
frame = head->block.frame;
goto no_increment;
} else {
result = call(head->block.code,
push(head->block.stack, mkStack(stack)),
head->block.frame);
break;
}
case '?': /* This operator doesn't push a value to the stack. */
if (head->tag == V_INT && head->n == 0)
code = code->on_false;
else
code = code->on_true;
goto no_increment;
case 'a':
result = findVar(stack, code->n);
if (result == NULL)
fatal("%d:%d: Stack index `%c' is too high",
code->line, code->col,
'a' + code->n);
break;
case 'A':
if (frame == NULL)
fatal("%d:%d: Cannot use %c without opening a frame with [",
code->line, code->col,
'A' + code->n);
result = findVar(frame->stack, code->n);
if (result == NULL)
fatal("%d:%d: Frame index `%c' is too high",
code->line, code->col,
'A' + code->n);
break;
case '&':
result = mkStack(stack);
break;
case '@': /* This operator changes the stack completely. */
if (head->tag != V_STACK)
fatal("%d:%d: Cannot use @ on %s",
code->line, code->col,
valueTypeString(head));
stack = head->stack;
continue;
case '[': /* This operator doesn't push a value to the stack. */
if (head->tag != V_STACK)
fatal("%d:%d: Cannot use [ on %s",
code->line, code->col,
valueTypeString(head));
frame = pushFrame(frame, head->stack);
continue;
case ']': /* This operator doesn't push a value to the stack. */
if (frame == NULL)
fatal("%d:%d: ] does not have a matching [ in scope",
code->line, code->col);
frame = frame->next;
continue;
case '\'':
result = mkInt(code->n);
break;
case ',':
{
int c = utf8_fgetc(stdin);
if (c == EOF && ferror(stdin))
perror("blockscript: ");
result = mkInt(c);
}
break;
case '.': /* This operator doesn't push a value to the stack. */
if (head->tag != V_INT)
fatal("%d:%d: Cannot print %s",
code->line, code->col,
valueTypeString(head));
utf8_fputc(head->n, stdout);
continue;
#define INTOP(expr, name) { \
Value *bv, *av; \
int b, a; \
\
if (stack->next == NULL) \
fatal("%d:%d: Too few arguments on stack for `" name "'", \
code->line, code->col); \
\
bv = stack->next->value; \
av = head; \
\
if (bv->tag != V_INT || av->tag != V_INT) \
fatal("%d:%d: Cannot use `" name "' on %s and %s", \
code->line, code->col, \
valueTypeString(bv), valueTypeString(av)); \
\
b = bv->n; \
a = av->n; \
\
result = mkInt(expr); \
} break;
case '+': INTOP(b + a, "+")
case '-': INTOP(b - a, "-")
case '*': INTOP(b * a, "*")
case '/': INTOP(div_(b, a), "/")
case '%': INTOP(mod_(b, a), "%%")
case '<': INTOP(b < a, "<")
case '>': INTOP(b > a, ">")
case '=': INTOP(b == a, "=")
case 0x2260: INTOP(b != a, "≠")
case 0x2264: INTOP(b <= a, "≤")
case 0x2265: INTOP(b >= a, "≥")
#undef INTOP
default:
fatal("%d:%d: Internal error: invalid token `%c'",
code->line, code->col,
code->c);
}
assert(result != NULL);
stack = push(stack, result);
}
return stack->value;
}
static void execute(Token *code)
{
call(code, push(NULL, mkStack(push(NULL, mkBlock(code, NULL, NULL)))), NULL);
}
int main(void)
{
/* Ensure int is big enough to hold Unicode characters. */
assert(sizeof(int) >= 4);
execute(parse(stdin, ';'));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment