Skip to content

Instantly share code, notes, and snippets.

@vshabanov
Last active November 15, 2025 17:05
Show Gist options
  • Select an option

  • Save vshabanov/5b25605a7a4698f6e05391b4b4a07c7e to your computer and use it in GitHub Desktop.

Select an option

Save vshabanov/5b25605a7a4698f6e05391b4b4a07c7e to your computer and use it in GitHub Desktop.
RankN types support in C interpreter
/* Experiment with a uniform representation of values that can hold
references to C++ objects and be used polymorphically. Examples of
the Rank2 IO monad dictionary and relation extend are provided at
the bottom.
The approach is the same as in dynamically typed languages.
Primitive values are coupled with types at runtime. Functions only
keep their arity to know when to apply. Empty values ([] or
Nothing) are untyped.
No type inference is performed. We don't know the return type of a
function until it's called and the return value is available. We
don't know the array element type unless array has at least one
element.
This limits the reporting of value types to the top-level type only
and might delay some type checking failures. However, it is much
simpler (as there is no runtime type inference), more efficient
(the type is just a word) and converts RankN types from an
impossible (undecidable runtime type inference) to a trivial issue.
Arguably, one should use a proper compiler if the full function
type is required instead of inferring types at runtime.
The basic implementation idea is to have a uniform Value type
consisting of two words:
1. A tagged interface ID (an unboxed type tag or a pointer to the
interface ID), which allows us to perform runtime checks to
ensure that objects have compatible types.
2. The raw value itself (pointer, or an unboxed type).
The approach is less efficient than the standard FP language
implementation with a runtime type erasure (e.g., OCaml uses a
one-word uniform value format -- an integer when the lower bit is
set and a pointer otherwise), but allows to perform runtime checks
on manually written code and do not have segfaults.
The code is C89 to make things as transparent as possible. No
actual C++ objects are used here, and no memory management is
performed. The purpose is to demonstrate how to support RankN
types.
gcc -g -Wall -Wextra -Wpedantic -std=c89 -O0 rankn.c && ./a.out
*/
#include <assert.h>
#include <execinfo.h>
#include <math.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* ---------- Utilities ---------- */
#define CAT(a, b) a##b
#define CAT2(a, b) CAT(a, b)
#define STATIC_ASSERT(cond) \
typedef char CAT2(static_assert_, __LINE__)[(cond) ? 1 : -1]
void printStacktrace(void) {
void *callstack[128];
int i, frames = backtrace(callstack, 128);
char **strs = backtrace_symbols(callstack, frames);
if (strs == NULL)
perror("can't get stack trace");
for (i = frames - 1; i > 0; --i) { /* do not print strs[0] == printStacktrace */
fprintf(stderr, "%s\n", strs[i]);
}
free(strs);
}
void fail(const char *msg, ...) __attribute__((noreturn));
void fail(const char *msg, ...) {
va_list args;
printStacktrace();
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
exit(1);
}
typedef double myMalloc_align_t;
STATIC_ASSERT(sizeof(myMalloc_align_t) == 8);
/* bump allocator just for fun */
void *myMalloc(size_t size, const char *file, int line) {
static union {
char bytes[0x1000];
myMalloc_align_t _align; /* align the arena to 8 bytes */
} arena;
static char *ptr = arena.bytes;
char *r = ptr;
ptr += (size + 7) & ~7; /* align the next pointer to 8 bytes */
if (ptr - arena.bytes > (long)sizeof arena.bytes) {
size_t used = r - arena.bytes, total = sizeof arena.bytes;
fail("%s:%d: myMalloc arena exhausted: %lu/%lu bytes used (%lu free), %lu requested (arena at %p)\n",
file, line, used, total, total - used, size, arena.bytes);
}
/* printf("alloc: %3lu, used: %3ld, total: %5ld\n", size, ptr - r, ptr - arena.bytes); */
return r;
}
#define malloc(s) myMalloc(s, __FILE__, __LINE__)
/* ---------- Type tag ---------- */
typedef enum {
/* unboxed types */
TT_UNIT = 0,
TT_INT = 1,
TT_DOUBLE = 2,
/* (const char *) */
TT_STRING = 3,
/* boxed, refcounted object, includes functions, records, maybe, whatever.
'Type' has InterfaceId to check compatibility.
We can add an extra bit to implement built-in Maybe type.
It won't be possible to implement built-in Either as two types
are necessary.
*/
TT_OBJECT = 4,
TT_LAST = TT_OBJECT
} TypeTag;
#define TYPE_TAG_MASK ((uintptr_t)7)
/* we can increase the InterfaceId alignment with an attribute or pragma
to have more lower bits available */
STATIC_ASSERT(TYPE_TAG_MASK >= TT_LAST);
const char *typeTagStr(TypeTag t) {
switch (t) {
case TT_UNIT: return "TT_UNIT";
case TT_INT: return "TT_INT";
case TT_DOUBLE: return "TT_DOUBLE";
case TT_STRING: return "TT_STRING";
case TT_OBJECT: return "TT_OBJECT";
default: return "UNKNOWN";
}
}
/* ---------- Interface ID ---------- */
typedef struct InterfaceId_ {
int id;
const char *name; /* for debugging */
} InterfaceId;
const char *interfaceIdStr(const InterfaceId *i) { return i->name; }
int sameInterfaceId(const InterfaceId *a, const InterfaceId *b) {
return a == b || a->id == b->id;
}
/* ---------- Type ---------- */
typedef struct Type_ {
uintptr_t typeOrInterface;
} Type;
TypeTag typeTag(Type t) {
return (TypeTag)(t.typeOrInterface & TYPE_TAG_MASK);
}
InterfaceId *interfaceId(Type t) {
return (InterfaceId *)(t.typeOrInterface & ~TYPE_TAG_MASK);
}
/* constructor */
Type type(TypeTag tag, const InterfaceId *interface) {
Type t;
assert(tag <= TT_LAST);
assert(((uintptr_t)interface & TYPE_TAG_MASK) == 0);
t.typeOrInterface = tag + (uintptr_t)interface;
return t;
}
int sameType(Type a, Type b) {
return typeTag(a) == typeTag(b)
&& sameInterfaceId(interfaceId(a), interfaceId(b));
}
/* ---------- Value ---------- */
typedef union RawValue_ {
int asInt;
double asDouble;
const char *asString; /* for simplicity */
void *asPtr;
} RawValue;
typedef struct Value_ {
Type type;
RawValue value;
} Value;
STATIC_ASSERT(sizeof(Value) == 16);
TypeTag valueTypeTag(Value v) { return typeTag(v.type); }
InterfaceId *valueInterfaceId(Value v) { return interfaceId(v.type); }
void assertTypeTag(Value v, TypeTag t) {
if (valueTypeTag(v) != t)
fail("tag mismatch %s != %s", typeTagStr(valueTypeTag(v)), typeTagStr(t));
}
void assertInterfaceId(Value v, const InterfaceId *i) {
assertTypeTag(v, TT_OBJECT);
if (!sameInterfaceId(valueInterfaceId(v), i))
fail("interface mismatch %s != %s", interfaceIdStr(valueInterfaceId(v)), interfaceIdStr(i));
}
void assertType(Value v, Type t) {
assertTypeTag(v, typeTag(t));
if (typeTag(t) == TT_OBJECT) {
assertInterfaceId(v, interfaceId(t));
}
}
int getInt(Value v) {
assertTypeTag(v, TT_INT);
return v.value.asInt;
}
double getDouble(Value v) {
assertTypeTag(v, TT_DOUBLE);
return v.value.asDouble;
}
const char *getString(Value v) {
assertTypeTag(v, TT_STRING);
return v.value.asString;
}
void *getPtr(Value v, const InterfaceId *i) {
assertInterfaceId(v, i);
return v.value.asPtr;
}
Value unitValue(void) {
Value v;
v.type = type(TT_UNIT, NULL);
v.value.asPtr = NULL;
return v;
}
Value intValue(int x) {
Value v;
v.type = type(TT_INT, NULL);
v.value.asInt = x;
return v;
}
Value doubleValue(double x) {
Value v;
v.type = type(TT_DOUBLE, NULL);
v.value.asDouble = x;
return v;
}
Value stringValue(const char *x) {
Value v;
v.type = type(TT_STRING, NULL);
v.value.asString = x;
return v;
}
Value objectValue(const InterfaceId *iid, void *ptr) {
Value v;
v.type = type(TT_OBJECT, iid);
v.value.asPtr = ptr;
return v;
}
/* ---------- Record ---------- */
static const InterfaceId II_RECORD = {0, "record"};
typedef struct RecordField_ {
const char *name;
Value value;
} RecordField;
typedef struct Record_ {
int size;
RecordField fields[1 /* size */];
} Record;
/* we could create a separate data structure with field names and
types and only keep RawValue[] in Record. This may reduce memory
usage if we have many records with the same structure.
*/
size_t emptyRecordSize(void) { return offsetof(Record, fields); }
Record *recordPtr(Value v) { return (Record *)getPtr(v, &II_RECORD); }
int recordSize(Value v) { return recordPtr(v)->size; }
RecordField *recordFields(Value v) { return recordPtr(v)->fields; }
Value recordField(Value v, const char *name) {
int i, size = recordSize(v);
RecordField *f = recordFields(v);
for (i = 0; i < size; i++) {
if (strcmp(f[i].name, name) == 0)
return f[i].value;
}
fail("field '%s' is not found", name);
}
Value recordN(int size, RecordField **fields) {
Record *r;
assert(size >= 0);
r = (Record *)malloc(emptyRecordSize() + sizeof(r->fields[0]) * (unsigned)size);
r->size = size;
*fields = r->fields;
return objectValue(&II_RECORD, r);
}
Value record1(const char *name0, Value value0) {
RecordField *f; Value r = recordN(1, &f);
f[0].name = name0; f[0].value = value0;
return r;
}
Value record2(const char *name0, Value value0,
const char *name1, Value value1) {
RecordField *f; Value r = recordN(2, &f);
f[0].name = name0; f[0].value = value0;
f[1].name = name1; f[1].value = value1;
return r;
}
Value recordExtend(Value v, const char *name, Value x) {
int i, size = recordSize(v), found = 0;
RecordField *in = recordFields(v);
RecordField *out;
Value r = recordN(size+1, &out);
for (i = 0; i < size; i++) {
out[i].name = in[i].name;
if (strcmp(in[i].name, name) == 0) {
out[i].value = x;
found = 1;
} else {
out[i].value = in[i].value;
}
}
if (!found) {
out[size].name = name;
out[size].value = x;
} else {
recordPtr(r)->size = size; /* decrease the size */
}
return r;
}
/* ---------- Array ---------- */
static const InterfaceId II_ARRAY = {1, "array"};
typedef union ArrayElems_ {
RawValue asRaw[1 /* size */];
int asInt[1 /* size */];
} ArrayElems;
/* arrays are packed: one type and RawValue[] or int[]
But it must be isomorphic to Value[] -- an empty array has no type.
*/
typedef struct Array_ {
int size;
Type type;
ArrayElems elems;
} Array;
size_t emptyArraySize(void) { return offsetof(Array, elems); }
Array *arrayPtr(Value v) { return (Array *)getPtr(v, &II_ARRAY); }
int arraySize(Value v) { return arrayPtr(v)->size; }
Value arrayElem(Value v, int i) {
int size = arraySize(v);
Array *a = arrayPtr(v);
Value r;
assert(i >= 0);
assert(i < size);
r.type = a->type;
if (typeTag(a->type) == TT_INT) {
r.value.asInt = a->elems.asInt[i];
} else {
r.value = a->elems.asRaw[i];
}
return r;
}
Value arrayN(int size, Value *elems) {
Array *r; int i;
Type t = size >= 1 ? elems[0].type : type(TT_UNIT, NULL);
size_t elemSize = typeTag(t) == TT_INT ? sizeof r->elems.asInt[0] : sizeof r->elems.asRaw[0];
assert(size >= 0);
r = (Array *)malloc(emptyArraySize() + elemSize * (unsigned)size);
r->type = t;
r->size = size;
for (i = 0; i < size; ++i) {
assertType(elems[i], r->type);
if (typeTag(t) == TT_INT) {
r->elems.asInt[i] = elems[i].value.asInt;
} else {
r->elems.asRaw[i] = elems[i].value;
}
}
return objectValue(&II_ARRAY, r);
}
Value emptyArray(void) { return arrayN(0, NULL); }
Value array1(Value value0) { return arrayN(1, &value0); }
Value array2(Value value0, Value value1) {
Value vs[2];
vs[0] = value0;
vs[1] = value1;
return arrayN(2, vs);
}
/* ---------- Function ---------- */
static const InterfaceId II_FUNCTION = {2, "function"};
/* closure: native function + environment */
typedef struct Function_ {
int arity; /* does not include envSize */
void *function; /* Value *( *)(Value v0, ... Value v_arity+envSize-1) */
const char *name; /* debugging */
int envSize; /* captured environment */
Value env[1 /* envSize */];
/* We can keep 'Type's of arguments for autogenerated functions
and check them while creating a closure to show type errors before
applying the function. But it may slowdown closure creation.
*/
} Function;
size_t emptyFunctionSize(void) { return offsetof(Function, env); }
Function *functionPtr(Value v) { return (Function *)getPtr(v, &II_FUNCTION); }
int functionArity(Value v) { return functionPtr(v)->arity; }
const char *functionName(Value v) { return functionPtr(v)->name; }
Value *functionEnv(Value v) { return functionPtr(v)->env; }
typedef Value (*ValueFunction1)(Value);
typedef Value (*ValueFunction2)(Value, Value);
typedef Value (*ValueFunction3)(Value, Value, Value);
Value functionN(const char *name, void *function, int arity, int envSize, Value *env, int extraEnv) {
Function *f;
int i;
assert(arity >= 1);
assert(envSize >= 0);
f = (Function *)malloc(emptyFunctionSize() + sizeof(f->env[0]) * (unsigned)(envSize + extraEnv));
/* extraEnv initialization is on the caller */
f->arity = arity;
f->function = function;
f->name = name;
f->envSize = envSize + extraEnv;
for (i = 0; i < envSize; i++)
f->env[i] = env[i];
return objectValue(&II_FUNCTION, f);
}
Value function1(const char *name, ValueFunction1 f) {
return functionN(name, (void *)f, 1, 0, NULL, 0);
}
Value function2(const char *name, ValueFunction2 f) {
return functionN(name, (void *)f, 2, 0, NULL, 0);
}
Value function3(const char *name, ValueFunction3 f) {
return functionN(name, (void *)f, 3, 0, NULL, 0);
}
Value apply1(Value fv, Value a) {
Function *f = functionPtr(fv);
if (f->arity == 1) {
switch (f->arity + f->envSize) {
case 1: return ((ValueFunction1)f->function)(a);
case 2: return ((ValueFunction2)f->function)(f->env[0], a);
case 3: return ((ValueFunction3)f->function)(f->env[0], f->env[1], a);
default:
fail("arity > 3 is not yet implemented (arity = %d, envSize = %d", f->arity, f->envSize);
}
} else {
Value r = functionN(f->name, f->function, f->arity - 1, f->envSize, f->env, 1);
functionEnv(r)[f->envSize] = a; /* initialize extraEnv element */
return r;
}
}
Value apply2(Value fv, Value a, Value b) {
return apply1(apply1(fv, a), b);
}
/* ---------- Relation operations ---------- */
static const InterfaceId II_RELATION = {3, "relation"};
typedef struct Relation_ {
/* relation here is just a record of arrays */
Value record;
} Relation;
Value relationRecord(Value v) { return ((Relation *)getPtr(v, &II_RELATION))->record; }
Value recordToRelation(Value v) {
Relation *r = (Relation *)malloc(sizeof *r);
r->record = v;
return objectValue(&II_RELATION, r);
}
int relationSize(Value v) { return recordSize(relationRecord(v)); }
Value relation1(const char *col0, Value values0) {
assertInterfaceId(values0, &II_ARRAY);
return recordToRelation(record1(col0, values0));
}
Value relation2(const char *col0, Value values0,
const char *col1, Value values1) {
assertInterfaceId(values0, &II_ARRAY);
assertInterfaceId(values1, &II_ARRAY);
assert(arraySize(values0) == arraySize(values1));
return recordToRelation(record2(col0, values0, col1, values1));
}
Value relationExtend(Value f, Value inCols, const char *outCol, Value rel) {
const int nArgs = arraySize(inCols);
Value *inArrays = (Value *)malloc((size_t)nArgs * sizeof(Value));
Value *outArray;
Value rec = relationRecord(rel);
int r, c, nRows;
for (c = 0; c < nArgs; c++) {
inArrays[c] = recordField(rec, getString(arrayElem(inCols, c)));
}
if (recordSize(rec) == 0) {
return relation1(outCol, emptyArray());
} else {
/* the record is non-empty here */
nRows = arraySize(nArgs > 0 ? inArrays[0] : recordFields(rec)[0].value);
outArray = (Value *)malloc((size_t)nRows * sizeof(Value));
for (r = 0; r < nRows; r++) {
Value o = f;
for (c = 0; c < nArgs; c++)
o = apply1(o, arrayElem(inArrays[c], r));
outArray[r] = o;
}
return recordToRelation(recordExtend(rec, outCol, arrayN(nRows, outArray)));
}
}
/* ---------- Debug printing ---------- */
int printValue(char *buf, int size, Value v) {
assert(size >= 0);
switch (valueTypeTag(v)) {
case TT_UNIT:
return snprintf(buf, size, "()");
case TT_INT:
return snprintf(buf, size, "%d", getInt(v));
case TT_DOUBLE:
return snprintf(buf, size, "%f", getDouble(v));
case TT_STRING:
return snprintf(buf, size, "\"%s\"", getString(v));
case TT_OBJECT: {
InterfaceId *ii = valueInterfaceId(v);
if (sameInterfaceId(ii, &II_RECORD)) {
int i, rSize = recordSize(v), nPrinted = 0;
RecordField *f = recordFields(v);
char pre = '{';
for (i = 0; i < rSize; i++) {
int n = snprintf(buf, size, "%c .%s = ", pre, f[i].name);
nPrinted += n; size -= n; buf += n;
if (nPrinted >= size) return nPrinted;
pre = ',';
n = printValue(buf, size, f[i].value);
nPrinted += n; size -= n; buf += n;
if (nPrinted >= size) return nPrinted;
}
return nPrinted + snprintf(buf, size, " }");
} else if (sameInterfaceId(ii, &II_FUNCTION)) {
/* how to print argument types?
could use value stubs auto-generated from IDL.
what to do with Maybe (Maybe (Either ...)) ?
too many nested stubs
what to do with polymorphic functions?
can't make any stubs, need unification (nested!) at runtime
It must be done by the compiler, not by some C code.
*/
return snprintf(buf, size, "%s/%d", functionName(v), functionArity(v));
} else if (sameInterfaceId(ii, &II_ARRAY)) {
int i, aSize = arraySize(v), nPrinted = 0;
const char *pre = "[";
for (i = 0; i < aSize; i++) {
int n = snprintf(buf, size, "%s", pre);
nPrinted += n; size -= n; buf += n;
if (nPrinted >= size) return nPrinted;
pre = ", ";
n = printValue(buf, size, arrayElem(v, i));
nPrinted += n; size -= n; buf += n;
if (nPrinted >= size) return nPrinted;
}
return nPrinted + snprintf(buf, size, "]");
} else {
return snprintf(buf, size, "object<%s>", interfaceIdStr(ii));
}
}
default:
return snprintf(buf, size, "unknown type tag %d?", (int)(valueTypeTag(v)));
}
}
/* truncates or pads the value to always use the specified width */
void printValueFixedWidth(int n, Value v) {
int i;
char *buf;
assert(n >= 0);
buf = (char *)malloc((size_t)n+1);
i = printValue(buf, n+1, v);
if (i < n) { for (; i < n; i++) buf[i] = ' '; buf[n] = '\0'; }
fputs(buf, stdout);
}
void printValueNewline(Value v) {
char buf[1000];
printValue(buf, sizeof buf, v);
puts(buf);
}
void printRelation(Value v) {
const int colWidth = 10;
Record *rec = recordPtr(relationRecord(v));
int c, r, nRows;
if (rec->size == 0) {
printf("<empty relation>");
} else {
printf("|");
for (c = 0; c < rec->size; c++)
printf(" %-*.*s |", colWidth, colWidth, rec->fields[c].name);
puts("");
nRows = arraySize(rec->fields[0].value);
for (r = 0; r < nRows; r++) {
printf("|");
for (c = 0; c < rec->size; c++) {
printf(" ");
printValueFixedWidth(colWidth, arrayElem(rec->fields[c].value, r));
printf(" |");
}
puts("");
}
}
}
/* ---------- LET'S PLAY! ---------- */
Value f_sin(Value x) { return doubleValue(sin(getDouble(x))); }
Value f_plusDouble(Value a, Value b) { return doubleValue(getDouble(a) + getDouble(b)); }
Value f_plusInt(Value a, Value b) { return intValue(getInt(a) + getInt(b)); }
Value f_id(Value x) { return x; }
Value f_compose(Value f, Value g, Value x) { return apply1(f, apply1(g,x)); }
Value f_map(Value f, Value xs) {
int i, size = arraySize(xs);
Value *rs = (Value *)malloc(sizeof *rs * size);
for (i = 0; i < size; i++) {
rs[i] = apply1(f, arrayElem(xs, i));
}
return arrayN(size, rs);
}
/* IO a === () -> a */
Value f_IO_pure(Value x, Value _) { return x; }
Value f_IO_print(Value x, Value _) { printValueNewline(x); return unitValue(); }
Value f_IO_bind(Value a, Value b, Value _) {
Value x = apply1(a, unitValue());
Value m = apply1(b, x);
return apply1(m, unitValue());
}
Value bindTest(Value dict, Value x, Value m) {
/* pure x >>= m */
return apply2(recordField(dict, "bind"),
apply1(recordField(dict, "pure"), x),
m);
}
int main(void) {
{
/* IO monad and basic types */
Value IO_pure = function2("IO_pure", f_IO_pure);
Value IO_bind = function3("IO_bind", f_IO_bind);
Value IO_dict = record2("pure", IO_pure, "bind", IO_bind);
Value IO_print = function2("IO_print", f_IO_print);
Value sin = function1("sin", f_sin);
Value id = function1("id", f_id);
Value compose = function3("compose", f_compose);
Value rec1 =
record2(
"foo", record2("a", f_map(apply1(function2("plus", f_plusInt),
intValue(10)),
array2(intValue(1), intValue(2))),
"f", IO_bind),
"bar", apply1(apply2(compose, id, sin), doubleValue(M_PI)));
Value rec2 = recordExtend(rec1, "dori", stringValue("nori"));
apply1(bindTest(IO_dict, rec2, IO_print), unitValue());
}
{
/* relations */
Value baseRel = relation2(
"a", array2(doubleValue(1), doubleValue(2)),
"b", array2(doubleValue(3), doubleValue(4)));
Value relC = relationExtend(stringValue("foo"), emptyArray(), "c", baseRel);
Value relD =
relationExtend(
function2("plus", f_plusDouble),
array2(stringValue("a"), stringValue("b")), "d", relC);
printRelation(relD);
}
return 0;
}
@vshabanov
Copy link
Author

outputs

{ .foo = { .a = [11, 12], .f = IO_bind/3 }, .bar = 0.000000, .dori = "nori" }
| a          | b          | c          | d          |
| 1.000000   | 3.000000   | "foo"      | 4.000000   |
| 2.000000   | 4.000000   | "foo"      | 6.000000   |

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment