vshabanov · November 15, 2025 17:05 · vshabanov · Sep 21, 2025
diff --git a/rankn.c b/rankn.c
 /* Experiment with a uniform representation of values that can hold
   references to C++ objects and be used polymorphically. Examples of
   the Rank2 IO monad dictionary and relation extend are provided at
   the bottom.

   The approach is the same as in dynamically typed languages.
   Primitive values are coupled with types at runtime. Functions only
   keep their arity to know when to apply. Empty values ([] or
   Nothing) are untyped.

   No type inference is performed. We don't know the return type of a
   function until it's called and the return value is available. We
   don't know the array element type unless array has at least one
   element.

   This limits the reporting of value types to the top-level type only
   and might delay some type checking failures. However, it is much
   simpler (as there is no runtime type inference), more efficient
   (the type is just a word) and converts RankN types from an
   impossible (undecidable runtime type inference) to a trivial issue.

   Arguably, one should use a proper compiler if the full function
   type is required instead of inferring types at runtime.

   The basic implementation idea is to have a uniform Value type
   consisting of two words:
   1. A tagged interface ID (an unboxed type tag or a pointer to the
      interface ID), which allows us to perform runtime checks to
      ensure that objects have compatible types.
   2. The raw value itself (pointer, or an unboxed type).

   The approach is less efficient than the standard FP language
   implementation with a runtime type erasure (e.g., OCaml uses a
   one-word uniform value format -- an integer when the lower bit is
   set and a pointer otherwise), but allows to perform runtime checks
   on manually written code and do not have segfaults.

   The code is C89 to make things as transparent as possible. No
   actual C++ objects are used here, and no memory management is
   performed. The purpose is to demonstrate how to support RankN
   types.

   gcc -g -Wall -Wextra -Wpedantic -std=c89 -O0 rankn.c && ./a.out
 */

 #include <assert.h>
 #include <execinfo.h>
 #include <math.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 /* ---------- Utilities ---------- */

 #define CAT(a, b) a##b
 #define CAT2(a, b) CAT(a, b)
 #define STATIC_ASSERT(cond) \
    typedef char CAT2(static_assert_, __LINE__)[(cond) ? 1 : -1]

 void printStacktrace(void) {
    void *callstack[128];
    int i, frames = backtrace(callstack, 128);
    char **strs = backtrace_symbols(callstack, frames);
    if (strs == NULL)
        perror("can't get stack trace");
    for (i = frames - 1; i > 0; --i) { /* do not print strs[0] == printStacktrace */
        fprintf(stderr, "%s\n", strs[i]);
    }
    free(strs);
 }

 void fail(const char *msg, ...) __attribute__((noreturn));
 void fail(const char *msg, ...) {
    va_list args;
    printStacktrace();
    va_start(args, msg);
    vfprintf(stderr, msg, args);
    va_end(args);
    exit(1);
 }

 typedef double myMalloc_align_t;
 STATIC_ASSERT(sizeof(myMalloc_align_t) == 8);

 /* bump allocator just for fun */
 void *myMalloc(size_t size, const char *file, int line) {
    static union {
        char bytes[0x1000];
        myMalloc_align_t _align; /* align the arena to 8 bytes */
    } arena;
    static char *ptr = arena.bytes;

    char *r = ptr;
    ptr += (size + 7) & ~7;    /* align the next pointer to 8 bytes */

    if (ptr - arena.bytes > (long)sizeof arena.bytes) {
        size_t used = r - arena.bytes, total = sizeof arena.bytes;
        fail("%s:%d: myMalloc arena exhausted: %lu/%lu bytes used (%lu free), %lu requested (arena at %p)\n",
             file, line, used, total, total - used, size, arena.bytes);
    }
 /*     printf("alloc: %3lu, used: %3ld, total: %5ld\n", size, ptr - r, ptr - arena.bytes); */
    return r;
 }
 #define malloc(s) myMalloc(s, __FILE__, __LINE__)

 /* ---------- Type tag ---------- */

 typedef enum {
    /* unboxed types */
    TT_UNIT = 0,
    TT_INT = 1,
    TT_DOUBLE = 2,

    /* (const char *) */
    TT_STRING = 3,

    /* boxed, refcounted object, includes functions, records, maybe, whatever.
       'Type' has InterfaceId to check compatibility.
       We can add an extra bit to implement built-in Maybe type.
       It won't be possible to implement built-in Either as two types
       are necessary.
     */
    TT_OBJECT = 4,
    TT_LAST = TT_OBJECT
 } TypeTag;

 #define TYPE_TAG_MASK ((uintptr_t)7)
 /* we can increase the InterfaceId alignment with an attribute or pragma
   to have more lower bits available */

 STATIC_ASSERT(TYPE_TAG_MASK >= TT_LAST);

 const char *typeTagStr(TypeTag t) {
    switch (t) {
        case TT_UNIT:   return "TT_UNIT";
        case TT_INT:    return "TT_INT";
        case TT_DOUBLE: return "TT_DOUBLE";
        case TT_STRING: return "TT_STRING";
        case TT_OBJECT: return "TT_OBJECT";
        default:        return "UNKNOWN";
    }
 }

 /* ---------- Interface ID ---------- */

 typedef struct InterfaceId_ {
    int id;
    const char *name;           /* for debugging */
 } InterfaceId;

 const char *interfaceIdStr(const InterfaceId *i) { return i->name; }

 int sameInterfaceId(const InterfaceId *a, const InterfaceId *b) {
    return a == b || a->id == b->id;
 }

 /* ---------- Type ---------- */

 typedef struct Type_ {
    uintptr_t typeOrInterface;
 } Type;

 TypeTag typeTag(Type t) {
    return (TypeTag)(t.typeOrInterface & TYPE_TAG_MASK);
 }
 InterfaceId *interfaceId(Type t) {
    return (InterfaceId *)(t.typeOrInterface & ~TYPE_TAG_MASK);
 }
 /* constructor */
 Type type(TypeTag tag, const InterfaceId *interface) {
    Type t;
    assert(tag <= TT_LAST);
    assert(((uintptr_t)interface & TYPE_TAG_MASK) == 0);
    t.typeOrInterface = tag + (uintptr_t)interface;
    return t;
 }

 int sameType(Type a, Type b) {
    return typeTag(a) == typeTag(b)
        && sameInterfaceId(interfaceId(a), interfaceId(b));
 }

 /* ---------- Value ---------- */

 typedef union RawValue_ {
    int asInt;
    double asDouble;
    const char *asString;       /* for simplicity */
    void *asPtr;
 } RawValue;

 typedef struct Value_ {
    Type type;
    RawValue value;
 } Value;

 STATIC_ASSERT(sizeof(Value) == 16);

 TypeTag valueTypeTag(Value v) { return typeTag(v.type); }
 InterfaceId *valueInterfaceId(Value v) { return interfaceId(v.type); }

 void assertTypeTag(Value v, TypeTag t) {
    if (valueTypeTag(v) != t)
        fail("tag mismatch %s != %s", typeTagStr(valueTypeTag(v)), typeTagStr(t));
 }
 void assertInterfaceId(Value v, const InterfaceId *i) {
    assertTypeTag(v, TT_OBJECT);
    if (!sameInterfaceId(valueInterfaceId(v), i))
        fail("interface mismatch %s != %s", interfaceIdStr(valueInterfaceId(v)), interfaceIdStr(i));
 }
 void assertType(Value v, Type t) {
    assertTypeTag(v, typeTag(t));
    if (typeTag(t) == TT_OBJECT) {
        assertInterfaceId(v, interfaceId(t));
    }
 }

 int getInt(Value v) {
    assertTypeTag(v, TT_INT);
    return v.value.asInt;
 }
 double getDouble(Value v) {
    assertTypeTag(v, TT_DOUBLE);
    return v.value.asDouble;
 }
 const char *getString(Value v) {
    assertTypeTag(v, TT_STRING);
    return v.value.asString;
 }
 void *getPtr(Value v, const InterfaceId *i) {
    assertInterfaceId(v, i);
    return v.value.asPtr;
 }
 Value unitValue(void) {
    Value v;
    v.type = type(TT_UNIT, NULL);
    v.value.asPtr = NULL;
    return v;
 }
 Value intValue(int x) {
    Value v;
    v.type = type(TT_INT, NULL);
    v.value.asInt = x;
    return v;
 }
 Value doubleValue(double x) {
    Value v;
    v.type = type(TT_DOUBLE, NULL);
    v.value.asDouble = x;
    return v;
 }
 Value stringValue(const char *x) {
    Value v;
    v.type = type(TT_STRING, NULL);
    v.value.asString = x;
    return v;
 }
 Value objectValue(const InterfaceId *iid, void *ptr) {
    Value v;
    v.type = type(TT_OBJECT, iid);
    v.value.asPtr = ptr;
    return v;
 }

 /* ---------- Record ---------- */

 static const InterfaceId II_RECORD = {0, "record"};

 typedef struct RecordField_ {
    const char *name;
    Value value;
 } RecordField;

 typedef struct Record_ {
    int size;
    RecordField fields[1 /* size */];
 } Record;
 /* we could create a separate data structure with field names and
   types and only keep RawValue[] in Record. This may reduce memory
   usage if we have many records with the same structure.
 */
 size_t emptyRecordSize(void) { return offsetof(Record, fields); }

 Record *recordPtr(Value v) { return (Record *)getPtr(v, &II_RECORD); }
 int recordSize(Value v) { return recordPtr(v)->size; }
 RecordField *recordFields(Value v) { return recordPtr(v)->fields; }
 Value recordField(Value v, const char *name) {
    int i, size = recordSize(v);
    RecordField *f = recordFields(v);
    for (i = 0; i < size; i++) {
        if (strcmp(f[i].name, name) == 0)
            return f[i].value;
    }
    fail("field '%s' is not found", name);
 }
 Value recordN(int size, RecordField **fields) {
    Record *r;
    assert(size >= 0);
    r = (Record *)malloc(emptyRecordSize() + sizeof(r->fields[0]) * (unsigned)size);
    r->size = size;
    *fields = r->fields;
    return objectValue(&II_RECORD, r);
 }
 Value record1(const char *name0, Value value0) {
    RecordField *f; Value r = recordN(1, &f);
    f[0].name = name0; f[0].value = value0;
    return r;
 }
 Value record2(const char *name0, Value value0,
              const char *name1, Value value1) {
    RecordField *f; Value r = recordN(2, &f);
    f[0].name = name0; f[0].value = value0;
    f[1].name = name1; f[1].value = value1;
    return r;
 }
 Value recordExtend(Value v, const char *name, Value x) {
    int i, size = recordSize(v), found = 0;
    RecordField *in = recordFields(v);
    RecordField *out;
    Value r = recordN(size+1, &out);
    for (i = 0; i < size; i++) {
        out[i].name = in[i].name;
        if (strcmp(in[i].name, name) == 0) {
            out[i].value = x;
            found = 1;
        } else {
            out[i].value = in[i].value;
        }
    }
    if (!found) {
        out[size].name = name;
        out[size].value = x;
    } else {
        recordPtr(r)->size = size; /* decrease the size */
    }
    return r;
 }

 /* ---------- Array ---------- */

 static const InterfaceId II_ARRAY = {1, "array"};

 typedef union ArrayElems_ {
    RawValue asRaw[1 /* size */];
    int asInt[1 /* size */];
 } ArrayElems;
 /* arrays are packed: one type and RawValue[] or int[]
   But it must be isomorphic to Value[] -- an empty array has no type.
 */
 typedef struct Array_ {
    int size;
    Type type;
    ArrayElems elems;
 } Array;

 size_t emptyArraySize(void) { return offsetof(Array, elems); }

 Array *arrayPtr(Value v) { return (Array *)getPtr(v, &II_ARRAY); }
 int arraySize(Value v) { return arrayPtr(v)->size; }
 Value arrayElem(Value v, int i) {
    int size = arraySize(v);
    Array *a = arrayPtr(v);
    Value r;
    assert(i >= 0);
    assert(i < size);
    r.type = a->type;
    if (typeTag(a->type) == TT_INT) {
        r.value.asInt = a->elems.asInt[i];
    } else {
        r.value = a->elems.asRaw[i];
    }
    return r;
 }
 Value arrayN(int size, Value *elems) {
    Array *r; int i;
    Type t = size >= 1 ? elems[0].type : type(TT_UNIT, NULL);
    size_t elemSize = typeTag(t) == TT_INT ? sizeof r->elems.asInt[0] : sizeof r->elems.asRaw[0];
    assert(size >= 0);
    r = (Array *)malloc(emptyArraySize() + elemSize * (unsigned)size);
    r->type = t;
    r->size = size;
    for (i = 0; i < size; ++i) {
        assertType(elems[i], r->type);
        if (typeTag(t) == TT_INT) {
            r->elems.asInt[i] = elems[i].value.asInt;
        } else {
            r->elems.asRaw[i] = elems[i].value;
        }
    }
    return objectValue(&II_ARRAY, r);
 }
 Value emptyArray(void) { return arrayN(0, NULL); }
 Value array1(Value value0) { return arrayN(1, &value0); }
 Value array2(Value value0, Value value1) {
    Value vs[2];
    vs[0] = value0;
    vs[1] = value1;
    return arrayN(2, vs);
 }
 /* ---------- Function ---------- */

 static const InterfaceId II_FUNCTION = {2, "function"};

 /* closure: native function + environment */
 typedef struct Function_ {
    int arity;         /* does not include envSize */
    void *function;    /* Value *( *)(Value v0, ... Value v_arity+envSize-1) */
    const char *name;  /* debugging */
    int envSize;       /* captured environment */
    Value env[1 /* envSize */];
    /* We can keep 'Type's of arguments for autogenerated functions
       and check them while creating a closure to show type errors before
       applying the function. But it may slowdown closure creation.
     */
 } Function;

 size_t emptyFunctionSize(void) { return offsetof(Function, env); }

 Function *functionPtr(Value v) { return (Function *)getPtr(v, &II_FUNCTION); }
 int functionArity(Value v) { return functionPtr(v)->arity; }
 const char *functionName(Value v) { return functionPtr(v)->name; }
 Value *functionEnv(Value v) { return functionPtr(v)->env; }

 typedef Value (*ValueFunction1)(Value);
 typedef Value (*ValueFunction2)(Value, Value);
 typedef Value (*ValueFunction3)(Value, Value, Value);
 Value functionN(const char *name, void *function, int arity, int envSize, Value *env, int extraEnv) {
    Function *f;
    int i;
    assert(arity >= 1);
    assert(envSize >= 0);
    f = (Function *)malloc(emptyFunctionSize() + sizeof(f->env[0]) * (unsigned)(envSize + extraEnv));
    /* extraEnv initialization is on the caller */
    f->arity = arity;
    f->function = function;
    f->name = name;
    f->envSize = envSize + extraEnv;
    for (i = 0; i < envSize; i++)
        f->env[i] = env[i];
    return objectValue(&II_FUNCTION, f);
 }
 Value function1(const char *name, ValueFunction1 f) {
    return functionN(name, (void *)f, 1, 0, NULL, 0);
 }
 Value function2(const char *name, ValueFunction2 f) {
    return functionN(name, (void *)f, 2, 0, NULL, 0);
 }
 Value function3(const char *name, ValueFunction3 f) {
    return functionN(name, (void *)f, 3, 0, NULL, 0);
 }
 Value apply1(Value fv, Value a) {
    Function *f = functionPtr(fv);
    if (f->arity == 1) {
        switch (f->arity + f->envSize) {
            case 1: return ((ValueFunction1)f->function)(a);
            case 2: return ((ValueFunction2)f->function)(f->env[0], a);
            case 3: return ((ValueFunction3)f->function)(f->env[0], f->env[1], a);
            default:
                fail("arity > 3 is not yet implemented (arity = %d, envSize = %d", f->arity, f->envSize);
        }
    } else {
        Value r = functionN(f->name, f->function, f->arity - 1, f->envSize, f->env, 1);
        functionEnv(r)[f->envSize] = a; /* initialize extraEnv element */
        return r;
    }
 }
 Value apply2(Value fv, Value a, Value b) {
    return apply1(apply1(fv, a), b);
 }

 /* ---------- Relation operations ---------- */

 static const InterfaceId II_RELATION = {3, "relation"};

 typedef struct Relation_ {
    /* relation here is just a record of arrays */
    Value record;
 } Relation;

 Value relationRecord(Value v) { return ((Relation *)getPtr(v, &II_RELATION))->record; }
 Value recordToRelation(Value v) {
    Relation *r = (Relation *)malloc(sizeof *r);
    r->record = v;
    return objectValue(&II_RELATION, r);
 }
 int relationSize(Value v) { return recordSize(relationRecord(v)); }
 Value relation1(const char *col0, Value values0) {
    assertInterfaceId(values0, &II_ARRAY);
    return recordToRelation(record1(col0, values0));
 }
 Value relation2(const char *col0, Value values0,
                const char *col1, Value values1) {
    assertInterfaceId(values0, &II_ARRAY);
    assertInterfaceId(values1, &II_ARRAY);
    assert(arraySize(values0) == arraySize(values1));
    return recordToRelation(record2(col0, values0, col1, values1));
 }
 Value relationExtend(Value f, Value inCols, const char *outCol, Value rel) {
    const int nArgs = arraySize(inCols);
    Value *inArrays = (Value *)malloc((size_t)nArgs * sizeof(Value));
    Value *outArray;
    Value rec = relationRecord(rel);
    int r, c, nRows;
    for (c = 0; c < nArgs; c++) {
        inArrays[c] = recordField(rec, getString(arrayElem(inCols, c)));
    }
    if (recordSize(rec) == 0) {
        return relation1(outCol, emptyArray());
    } else {
        /* the record is non-empty here */
        nRows = arraySize(nArgs > 0 ? inArrays[0] : recordFields(rec)[0].value);
        outArray = (Value *)malloc((size_t)nRows * sizeof(Value));
        for (r = 0; r < nRows; r++) {
            Value o = f;
            for (c = 0; c < nArgs; c++)
                o = apply1(o, arrayElem(inArrays[c], r));
            outArray[r] = o;
        }
        return recordToRelation(recordExtend(rec, outCol, arrayN(nRows, outArray)));
    }
 }

 /* ---------- Debug printing ---------- */

 int printValue(char *buf, int size, Value v) {
    assert(size >= 0);
    switch (valueTypeTag(v)) {
        case TT_UNIT:
            return snprintf(buf, size, "()");
        case TT_INT:
            return snprintf(buf, size, "%d", getInt(v));
        case TT_DOUBLE:
            return snprintf(buf, size, "%f", getDouble(v));
        case TT_STRING:
            return snprintf(buf, size, "\"%s\"", getString(v));
        case TT_OBJECT: {
            InterfaceId *ii = valueInterfaceId(v);
            if (sameInterfaceId(ii, &II_RECORD)) {
                int i, rSize = recordSize(v), nPrinted = 0;
                RecordField *f = recordFields(v);
                char pre = '{';
                for (i = 0; i < rSize; i++) {
                    int n = snprintf(buf, size, "%c .%s = ", pre, f[i].name);
                    nPrinted += n; size -= n; buf += n;
                    if (nPrinted >= size) return nPrinted;
                    pre = ',';
                    n = printValue(buf, size, f[i].value);
                    nPrinted += n; size -= n; buf += n;
                    if (nPrinted >= size) return nPrinted;
                }
                return nPrinted + snprintf(buf, size, " }");
            } else if (sameInterfaceId(ii, &II_FUNCTION)) {
                /* how to print argument types?
                   could use value stubs auto-generated from IDL.
                   what to do with Maybe (Maybe (Either ...)) ?
                   too many nested stubs
                   what to do with polymorphic functions?
                   can't make any stubs, need unification (nested!) at runtime
                   It must be done by the compiler, not by some C code.
                */
                return snprintf(buf, size, "%s/%d", functionName(v), functionArity(v));
            } else if (sameInterfaceId(ii, &II_ARRAY)) {
                int i, aSize = arraySize(v), nPrinted = 0;
                const char *pre = "[";
                for (i = 0; i < aSize; i++) {
                    int n = snprintf(buf, size, "%s", pre);
                    nPrinted += n; size -= n; buf += n;
                    if (nPrinted >= size) return nPrinted;
                    pre = ", ";
                    n = printValue(buf, size, arrayElem(v, i));
                    nPrinted += n; size -= n; buf += n;
                    if (nPrinted >= size) return nPrinted;
                }
                return nPrinted + snprintf(buf, size, "]");
            } else {
                return snprintf(buf, size, "object<%s>", interfaceIdStr(ii));
            }
        }
        default:
            return snprintf(buf, size, "unknown type tag %d?", (int)(valueTypeTag(v)));
    }
 }

 /* truncates or pads the value to always use the specified width */
 void printValueFixedWidth(int n, Value v) {
    int i;
    char *buf;
    assert(n >= 0);
    buf = (char *)malloc((size_t)n+1);
    i = printValue(buf, n+1, v);
    if (i < n) { for (; i < n; i++) buf[i] = ' '; buf[n] = '\0'; }
    fputs(buf, stdout);
 }
 void printValueNewline(Value v) {
    char buf[1000];
    printValue(buf, sizeof buf, v);
    puts(buf);
 }
 void printRelation(Value v) {
    const int colWidth = 10;
    Record *rec = recordPtr(relationRecord(v));
    int c, r, nRows;
    if (rec->size == 0) {
        printf("<empty relation>");
    } else {
        printf("|");
        for (c = 0; c < rec->size; c++)
            printf(" %-*.*s |", colWidth, colWidth, rec->fields[c].name);
        puts("");
        nRows = arraySize(rec->fields[0].value);
        for (r = 0; r < nRows; r++) {
            printf("|");
            for (c = 0; c < rec->size; c++) {
                printf(" ");
                printValueFixedWidth(colWidth, arrayElem(rec->fields[c].value, r));
                printf(" |");
            }
            puts("");
        }
    }
 }

 /* ---------- LET'S PLAY! ---------- */

 Value f_sin(Value x) { return doubleValue(sin(getDouble(x))); }
 Value f_plusDouble(Value a, Value b) { return doubleValue(getDouble(a) + getDouble(b)); }
 Value f_plusInt(Value a, Value b) { return intValue(getInt(a) + getInt(b)); }
 Value f_id(Value x) { return x; }
 Value f_compose(Value f, Value g, Value x) { return apply1(f, apply1(g,x)); }
 Value f_map(Value f, Value xs) {
    int i, size = arraySize(xs);
    Value *rs = (Value *)malloc(sizeof *rs * size);
    for (i = 0; i < size; i++) {
        rs[i] = apply1(f, arrayElem(xs, i));
    }
    return arrayN(size, rs);
 }

 /* IO a === () -> a */
 Value f_IO_pure(Value x, Value _) { return x; }
 Value f_IO_print(Value x, Value _) { printValueNewline(x); return unitValue(); }
 Value f_IO_bind(Value a, Value b, Value _) {
    Value x = apply1(a, unitValue());
    Value m = apply1(b, x);
    return apply1(m, unitValue());
 }

 Value bindTest(Value dict, Value x, Value m) {
    /* pure x >>= m */
    return apply2(recordField(dict, "bind"),
                  apply1(recordField(dict, "pure"), x),
                  m);
 }

 int main(void) {
    {
        /* IO monad and basic types */
        Value IO_pure = function2("IO_pure", f_IO_pure);
        Value IO_bind = function3("IO_bind", f_IO_bind);
        Value IO_dict = record2("pure", IO_pure, "bind", IO_bind);
        Value IO_print = function2("IO_print", f_IO_print);
        Value sin = function1("sin", f_sin);
        Value id = function1("id", f_id);
        Value compose = function3("compose", f_compose);
        Value rec1 =
            record2(
                "foo", record2("a", f_map(apply1(function2("plus", f_plusInt),
                                                 intValue(10)),
                                          array2(intValue(1), intValue(2))),
                               "f", IO_bind),
                "bar", apply1(apply2(compose, id, sin), doubleValue(M_PI)));
        Value rec2 = recordExtend(rec1, "dori", stringValue("nori"));

        apply1(bindTest(IO_dict, rec2, IO_print), unitValue());
    }
    {
        /* relations */
        Value baseRel = relation2(
            "a", array2(doubleValue(1), doubleValue(2)),
            "b", array2(doubleValue(3), doubleValue(4)));
        Value relC = relationExtend(stringValue("foo"), emptyArray(), "c", baseRel);
        Value relD =
            relationExtend(
                function2("plus", f_plusDouble),
                array2(stringValue("a"), stringValue("b")), "d", relC);
        printRelation(relD);
    }
    return 0;
 }
	/* Experiment with a uniform representation of values that can hold
	references to C++ objects and be used polymorphically. Examples of
	the Rank2 IO monad dictionary and relation extend are provided at
	the bottom.

	The approach is the same as in dynamically typed languages.
	Primitive values are coupled with types at runtime. Functions only
	keep their arity to know when to apply. Empty values ([] or
	Nothing) are untyped.

	No type inference is performed. We don't know the return type of a
	function until it's called and the return value is available. We
	don't know the array element type unless array has at least one
	element.

	This limits the reporting of value types to the top-level type only
	and might delay some type checking failures. However, it is much
	simpler (as there is no runtime type inference), more efficient
	(the type is just a word) and converts RankN types from an
	impossible (undecidable runtime type inference) to a trivial issue.

	Arguably, one should use a proper compiler if the full function
	type is required instead of inferring types at runtime.

	The basic implementation idea is to have a uniform Value type
	consisting of two words:
	1. A tagged interface ID (an unboxed type tag or a pointer to the
	interface ID), which allows us to perform runtime checks to
	ensure that objects have compatible types.
	2. The raw value itself (pointer, or an unboxed type).

	The approach is less efficient than the standard FP language
	implementation with a runtime type erasure (e.g., OCaml uses a
	one-word uniform value format -- an integer when the lower bit is
	set and a pointer otherwise), but allows to perform runtime checks
	on manually written code and do not have segfaults.

	The code is C89 to make things as transparent as possible. No
	actual C++ objects are used here, and no memory management is
	performed. The purpose is to demonstrate how to support RankN
	types.

	gcc -g -Wall -Wextra -Wpedantic -std=c89 -O0 rankn.c && ./a.out
	*/

	#include <assert.h>
	#include <execinfo.h>
	#include <math.h>
	#include <stdarg.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	/* ---------- Utilities ---------- */

	#define CAT(a, b) a##b
	#define CAT2(a, b) CAT(a, b)
	#define STATIC_ASSERT(cond) \
	typedef char CAT2(static_assert_, __LINE__)[(cond) ? 1 : -1]

	void printStacktrace(void) {
	void *callstack[128];
	int i, frames = backtrace(callstack, 128);
	char **strs = backtrace_symbols(callstack, frames);
	if (strs == NULL)
	perror("can't get stack trace");
	for (i = frames - 1; i > 0; --i) { /* do not print strs[0] == printStacktrace */
	fprintf(stderr, "%s\n", strs[i]);
	}
	free(strs);
	}

	void fail(const char *msg, ...) __attribute__((noreturn));
	void fail(const char *msg, ...) {
	va_list args;
	printStacktrace();
	va_start(args, msg);
	vfprintf(stderr, msg, args);
	va_end(args);
	exit(1);
	}

	typedef double myMalloc_align_t;
	STATIC_ASSERT(sizeof(myMalloc_align_t) == 8);

	/* bump allocator just for fun */
	void myMalloc(size_t size, const char file, int line) {
	static union {
	char bytes[0x1000];
	myMalloc_align_t _align; /* align the arena to 8 bytes */
	} arena;
	static char *ptr = arena.bytes;

	char *r = ptr;
	ptr += (size + 7) & ~7; /* align the next pointer to 8 bytes */

	if (ptr - arena.bytes > (long)sizeof arena.bytes) {
	size_t used = r - arena.bytes, total = sizeof arena.bytes;
	fail("%s:%d: myMalloc arena exhausted: %lu/%lu bytes used (%lu free), %lu requested (arena at %p)\n",
	file, line, used, total, total - used, size, arena.bytes);
	}
	/* printf("alloc: %3lu, used: %3ld, total: %5ld\n", size, ptr - r, ptr - arena.bytes); */
	return r;
	}
	#define malloc(s) myMalloc(s, __FILE__, __LINE__)

	/* ---------- Type tag ---------- */

	typedef enum {
	/* unboxed types */
	TT_UNIT = 0,
	TT_INT = 1,
	TT_DOUBLE = 2,

	/* (const char ) /
	TT_STRING = 3,

	/* boxed, refcounted object, includes functions, records, maybe, whatever.
	'Type' has InterfaceId to check compatibility.
	We can add an extra bit to implement built-in Maybe type.
	It won't be possible to implement built-in Either as two types
	are necessary.
	*/
	TT_OBJECT = 4,
	TT_LAST = TT_OBJECT
	} TypeTag;

	#define TYPE_TAG_MASK ((uintptr_t)7)
	/* we can increase the InterfaceId alignment with an attribute or pragma
	to have more lower bits available */

	STATIC_ASSERT(TYPE_TAG_MASK >= TT_LAST);

	const char *typeTagStr(TypeTag t) {
	switch (t) {
	case TT_UNIT: return "TT_UNIT";
	case TT_INT: return "TT_INT";
	case TT_DOUBLE: return "TT_DOUBLE";
	case TT_STRING: return "TT_STRING";
	case TT_OBJECT: return "TT_OBJECT";
	default: return "UNKNOWN";
	}
	}

	/* ---------- Interface ID ---------- */

	typedef struct InterfaceId_ {
	int id;
	const char name; / for debugging */
	} InterfaceId;

	const char interfaceIdStr(const InterfaceId i) { return i->name; }

	int sameInterfaceId(const InterfaceId a, const InterfaceId b) {
	return a == b \|\| a->id == b->id;
	}

	/* ---------- Type ---------- */

	typedef struct Type_ {
	uintptr_t typeOrInterface;
	} Type;

	TypeTag typeTag(Type t) {
	return (TypeTag)(t.typeOrInterface & TYPE_TAG_MASK);
	}
	InterfaceId *interfaceId(Type t) {
	return (InterfaceId *)(t.typeOrInterface & ~TYPE_TAG_MASK);
	}
	/* constructor */
	Type type(TypeTag tag, const InterfaceId *interface) {
	Type t;
	assert(tag <= TT_LAST);
	assert(((uintptr_t)interface & TYPE_TAG_MASK) == 0);
	t.typeOrInterface = tag + (uintptr_t)interface;
	return t;
	}

	int sameType(Type a, Type b) {
	return typeTag(a) == typeTag(b)
	&& sameInterfaceId(interfaceId(a), interfaceId(b));
	}

	/* ---------- Value ---------- */

	typedef union RawValue_ {
	int asInt;
	double asDouble;
	const char asString; / for simplicity */
	void *asPtr;
	} RawValue;

	typedef struct Value_ {
	Type type;
	RawValue value;
	} Value;

	STATIC_ASSERT(sizeof(Value) == 16);

	TypeTag valueTypeTag(Value v) { return typeTag(v.type); }
	InterfaceId *valueInterfaceId(Value v) { return interfaceId(v.type); }

	void assertTypeTag(Value v, TypeTag t) {
	if (valueTypeTag(v) != t)
	fail("tag mismatch %s != %s", typeTagStr(valueTypeTag(v)), typeTagStr(t));
	}
	void assertInterfaceId(Value v, const InterfaceId *i) {
	assertTypeTag(v, TT_OBJECT);
	if (!sameInterfaceId(valueInterfaceId(v), i))
	fail("interface mismatch %s != %s", interfaceIdStr(valueInterfaceId(v)), interfaceIdStr(i));
	}
	void assertType(Value v, Type t) {
	assertTypeTag(v, typeTag(t));
	if (typeTag(t) == TT_OBJECT) {
	assertInterfaceId(v, interfaceId(t));
	}
	}

	int getInt(Value v) {
	assertTypeTag(v, TT_INT);
	return v.value.asInt;
	}
	double getDouble(Value v) {
	assertTypeTag(v, TT_DOUBLE);
	return v.value.asDouble;
	}
	const char *getString(Value v) {
	assertTypeTag(v, TT_STRING);
	return v.value.asString;
	}
	void getPtr(Value v, const InterfaceId i) {
	assertInterfaceId(v, i);
	return v.value.asPtr;
	}
	Value unitValue(void) {
	Value v;
	v.type = type(TT_UNIT, NULL);
	v.value.asPtr = NULL;
	return v;
	}
	Value intValue(int x) {
	Value v;
	v.type = type(TT_INT, NULL);
	v.value.asInt = x;
	return v;
	}
	Value doubleValue(double x) {
	Value v;
	v.type = type(TT_DOUBLE, NULL);
	v.value.asDouble = x;
	return v;
	}
	Value stringValue(const char *x) {
	Value v;
	v.type = type(TT_STRING, NULL);
	v.value.asString = x;
	return v;
	}
	Value objectValue(const InterfaceId iid, void ptr) {
	Value v;
	v.type = type(TT_OBJECT, iid);
	v.value.asPtr = ptr;
	return v;
	}

	/* ---------- Record ---------- */

	static const InterfaceId II_RECORD = {0, "record"};

	typedef struct RecordField_ {
	const char *name;
	Value value;
	} RecordField;

	typedef struct Record_ {
	int size;
	RecordField fields[1 /* size */];
	} Record;
	/* we could create a separate data structure with field names and
	types and only keep RawValue[] in Record. This may reduce memory
	usage if we have many records with the same structure.
	*/
	size_t emptyRecordSize(void) { return offsetof(Record, fields); }

	Record recordPtr(Value v) { return (Record )getPtr(v, &II_RECORD); }
	int recordSize(Value v) { return recordPtr(v)->size; }
	RecordField *recordFields(Value v) { return recordPtr(v)->fields; }
	Value recordField(Value v, const char *name) {
	int i, size = recordSize(v);
	RecordField *f = recordFields(v);
	for (i = 0; i < size; i++) {
	if (strcmp(f[i].name, name) == 0)
	return f[i].value;
	}
	fail("field '%s' is not found", name);
	}
	Value recordN(int size, RecordField **fields) {
	Record *r;
	assert(size >= 0);
	r = (Record )malloc(emptyRecordSize() + sizeof(r->fields[0]) (unsigned)size);
	r->size = size;
	*fields = r->fields;
	return objectValue(&II_RECORD, r);
	}
	Value record1(const char *name0, Value value0) {
	RecordField *f; Value r = recordN(1, &f);
	f[0].name = name0; f[0].value = value0;
	return r;
	}
	Value record2(const char *name0, Value value0,
	const char *name1, Value value1) {
	RecordField *f; Value r = recordN(2, &f);
	f[0].name = name0; f[0].value = value0;
	f[1].name = name1; f[1].value = value1;
	return r;
	}
	Value recordExtend(Value v, const char *name, Value x) {
	int i, size = recordSize(v), found = 0;
	RecordField *in = recordFields(v);
	RecordField *out;
	Value r = recordN(size+1, &out);
	for (i = 0; i < size; i++) {
	out[i].name = in[i].name;
	if (strcmp(in[i].name, name) == 0) {
	out[i].value = x;
	found = 1;
	} else {
	out[i].value = in[i].value;
	}
	}
	if (!found) {
	out[size].name = name;
	out[size].value = x;
	} else {
	recordPtr(r)->size = size; /* decrease the size */
	}
	return r;
	}

	/* ---------- Array ---------- */

	static const InterfaceId II_ARRAY = {1, "array"};

	typedef union ArrayElems_ {
	RawValue asRaw[1 /* size */];
	int asInt[1 /* size */];
	} ArrayElems;
	/* arrays are packed: one type and RawValue[] or int[]
	But it must be isomorphic to Value[] -- an empty array has no type.
	*/
	typedef struct Array_ {
	int size;
	Type type;
	ArrayElems elems;
	} Array;

	size_t emptyArraySize(void) { return offsetof(Array, elems); }

	Array arrayPtr(Value v) { return (Array )getPtr(v, &II_ARRAY); }
	int arraySize(Value v) { return arrayPtr(v)->size; }
	Value arrayElem(Value v, int i) {
	int size = arraySize(v);
	Array *a = arrayPtr(v);
	Value r;
	assert(i >= 0);
	assert(i < size);
	r.type = a->type;
	if (typeTag(a->type) == TT_INT) {
	r.value.asInt = a->elems.asInt[i];
	} else {
	r.value = a->elems.asRaw[i];
	}
	return r;
	}
	Value arrayN(int size, Value *elems) {
	Array *r; int i;
	Type t = size >= 1 ? elems[0].type : type(TT_UNIT, NULL);
	size_t elemSize = typeTag(t) == TT_INT ? sizeof r->elems.asInt[0] : sizeof r->elems.asRaw[0];
	assert(size >= 0);
	r = (Array )malloc(emptyArraySize() + elemSize (unsigned)size);
	r->type = t;
	r->size = size;
	for (i = 0; i < size; ++i) {
	assertType(elems[i], r->type);
	if (typeTag(t) == TT_INT) {
	r->elems.asInt[i] = elems[i].value.asInt;
	} else {
	r->elems.asRaw[i] = elems[i].value;
	}
	}
	return objectValue(&II_ARRAY, r);
	}
	Value emptyArray(void) { return arrayN(0, NULL); }
	Value array1(Value value0) { return arrayN(1, &value0); }
	Value array2(Value value0, Value value1) {
	Value vs[2];
	vs[0] = value0;
	vs[1] = value1;
	return arrayN(2, vs);
	}
	/* ---------- Function ---------- */

	static const InterfaceId II_FUNCTION = {2, "function"};

	/* closure: native function + environment */
	typedef struct Function_ {
	int arity; /* does not include envSize */
	void function; / Value ( )(Value v0, ... Value v_arity+envSize-1) */
	const char name; / debugging */
	int envSize; /* captured environment */
	Value env[1 /* envSize */];
	/* We can keep 'Type's of arguments for autogenerated functions
	and check them while creating a closure to show type errors before
	applying the function. But it may slowdown closure creation.
	*/
	} Function;

	size_t emptyFunctionSize(void) { return offsetof(Function, env); }

	Function functionPtr(Value v) { return (Function )getPtr(v, &II_FUNCTION); }
	int functionArity(Value v) { return functionPtr(v)->arity; }
	const char *functionName(Value v) { return functionPtr(v)->name; }
	Value *functionEnv(Value v) { return functionPtr(v)->env; }

	typedef Value (*ValueFunction1)(Value);
	typedef Value (*ValueFunction2)(Value, Value);
	typedef Value (*ValueFunction3)(Value, Value, Value);
	Value functionN(const char name, void function, int arity, int envSize, Value *env, int extraEnv) {
	Function *f;
	int i;
	assert(arity >= 1);
	assert(envSize >= 0);
	f = (Function )malloc(emptyFunctionSize() + sizeof(f->env[0]) (unsigned)(envSize + extraEnv));
	/* extraEnv initialization is on the caller */
	f->arity = arity;
	f->function = function;
	f->name = name;
	f->envSize = envSize + extraEnv;
	for (i = 0; i < envSize; i++)
	f->env[i] = env[i];
	return objectValue(&II_FUNCTION, f);
	}
	Value function1(const char *name, ValueFunction1 f) {
	return functionN(name, (void *)f, 1, 0, NULL, 0);
	}
	Value function2(const char *name, ValueFunction2 f) {
	return functionN(name, (void *)f, 2, 0, NULL, 0);
	}
	Value function3(const char *name, ValueFunction3 f) {
	return functionN(name, (void *)f, 3, 0, NULL, 0);
	}
	Value apply1(Value fv, Value a) {
	Function *f = functionPtr(fv);
	if (f->arity == 1) {
	switch (f->arity + f->envSize) {
	case 1: return ((ValueFunction1)f->function)(a);
	case 2: return ((ValueFunction2)f->function)(f->env[0], a);
	case 3: return ((ValueFunction3)f->function)(f->env[0], f->env[1], a);
	default:
	fail("arity > 3 is not yet implemented (arity = %d, envSize = %d", f->arity, f->envSize);
	}
	} else {
	Value r = functionN(f->name, f->function, f->arity - 1, f->envSize, f->env, 1);
	functionEnv(r)[f->envSize] = a; /* initialize extraEnv element */
	return r;
	}
	}
	Value apply2(Value fv, Value a, Value b) {
	return apply1(apply1(fv, a), b);
	}

	/* ---------- Relation operations ---------- */

	static const InterfaceId II_RELATION = {3, "relation"};

	typedef struct Relation_ {
	/* relation here is just a record of arrays */
	Value record;
	} Relation;

	Value relationRecord(Value v) { return ((Relation *)getPtr(v, &II_RELATION))->record; }
	Value recordToRelation(Value v) {
	Relation r = (Relation )malloc(sizeof *r);
	r->record = v;
	return objectValue(&II_RELATION, r);
	}
	int relationSize(Value v) { return recordSize(relationRecord(v)); }
	Value relation1(const char *col0, Value values0) {
	assertInterfaceId(values0, &II_ARRAY);
	return recordToRelation(record1(col0, values0));
	}
	Value relation2(const char *col0, Value values0,
	const char *col1, Value values1) {
	assertInterfaceId(values0, &II_ARRAY);
	assertInterfaceId(values1, &II_ARRAY);
	assert(arraySize(values0) == arraySize(values1));
	return recordToRelation(record2(col0, values0, col1, values1));
	}
	Value relationExtend(Value f, Value inCols, const char *outCol, Value rel) {
	const int nArgs = arraySize(inCols);
	Value inArrays = (Value )malloc((size_t)nArgs * sizeof(Value));
	Value *outArray;
	Value rec = relationRecord(rel);
	int r, c, nRows;
	for (c = 0; c < nArgs; c++) {
	inArrays[c] = recordField(rec, getString(arrayElem(inCols, c)));
	}
	if (recordSize(rec) == 0) {
	return relation1(outCol, emptyArray());
	} else {
	/* the record is non-empty here */
	nRows = arraySize(nArgs > 0 ? inArrays[0] : recordFields(rec)[0].value);
	outArray = (Value )malloc((size_t)nRows sizeof(Value));
	for (r = 0; r < nRows; r++) {
	Value o = f;
	for (c = 0; c < nArgs; c++)
	o = apply1(o, arrayElem(inArrays[c], r));
	outArray[r] = o;
	}
	return recordToRelation(recordExtend(rec, outCol, arrayN(nRows, outArray)));
	}
	}

	/* ---------- Debug printing ---------- */

	int printValue(char *buf, int size, Value v) {
	assert(size >= 0);
	switch (valueTypeTag(v)) {
	case TT_UNIT:
	return snprintf(buf, size, "()");
	case TT_INT:
	return snprintf(buf, size, "%d", getInt(v));
	case TT_DOUBLE:
	return snprintf(buf, size, "%f", getDouble(v));
	case TT_STRING:
	return snprintf(buf, size, "\"%s\"", getString(v));
	case TT_OBJECT: {
	InterfaceId *ii = valueInterfaceId(v);
	if (sameInterfaceId(ii, &II_RECORD)) {
	int i, rSize = recordSize(v), nPrinted = 0;
	RecordField *f = recordFields(v);
	char pre = '{';
	for (i = 0; i < rSize; i++) {
	int n = snprintf(buf, size, "%c .%s = ", pre, f[i].name);
	nPrinted += n; size -= n; buf += n;
	if (nPrinted >= size) return nPrinted;
	pre = ',';
	n = printValue(buf, size, f[i].value);
	nPrinted += n; size -= n; buf += n;
	if (nPrinted >= size) return nPrinted;
	}
	return nPrinted + snprintf(buf, size, " }");
	} else if (sameInterfaceId(ii, &II_FUNCTION)) {
	/* how to print argument types?
	could use value stubs auto-generated from IDL.
	what to do with Maybe (Maybe (Either ...)) ?
	too many nested stubs
	what to do with polymorphic functions?
	can't make any stubs, need unification (nested!) at runtime
	It must be done by the compiler, not by some C code.
	*/
	return snprintf(buf, size, "%s/%d", functionName(v), functionArity(v));
	} else if (sameInterfaceId(ii, &II_ARRAY)) {
	int i, aSize = arraySize(v), nPrinted = 0;
	const char *pre = "[";
	for (i = 0; i < aSize; i++) {
	int n = snprintf(buf, size, "%s", pre);
	nPrinted += n; size -= n; buf += n;
	if (nPrinted >= size) return nPrinted;
	pre = ", ";
	n = printValue(buf, size, arrayElem(v, i));
	nPrinted += n; size -= n; buf += n;
	if (nPrinted >= size) return nPrinted;
	}
	return nPrinted + snprintf(buf, size, "]");
	} else {
	return snprintf(buf, size, "object<%s>", interfaceIdStr(ii));
	}
	}
	default:
	return snprintf(buf, size, "unknown type tag %d?", (int)(valueTypeTag(v)));
	}
	}

	/* truncates or pads the value to always use the specified width */
	void printValueFixedWidth(int n, Value v) {
	int i;
	char *buf;
	assert(n >= 0);
	buf = (char *)malloc((size_t)n+1);
	i = printValue(buf, n+1, v);
	if (i < n) { for (; i < n; i++) buf[i] = ' '; buf[n] = '\0'; }
	fputs(buf, stdout);
	}
	void printValueNewline(Value v) {
	char buf[1000];
	printValue(buf, sizeof buf, v);
	puts(buf);
	}
	void printRelation(Value v) {
	const int colWidth = 10;
	Record *rec = recordPtr(relationRecord(v));
	int c, r, nRows;
	if (rec->size == 0) {
	printf("<empty relation>");
	} else {
	printf("\|");
	for (c = 0; c < rec->size; c++)
	printf(" %-.s \|", colWidth, colWidth, rec->fields[c].name);
	puts("");
	nRows = arraySize(rec->fields[0].value);
	for (r = 0; r < nRows; r++) {
	printf("\|");
	for (c = 0; c < rec->size; c++) {
	printf(" ");
	printValueFixedWidth(colWidth, arrayElem(rec->fields[c].value, r));
	printf(" \|");
	}
	puts("");
	}
	}
	}

	/* ---------- LET'S PLAY! ---------- */

	Value f_sin(Value x) { return doubleValue(sin(getDouble(x))); }
	Value f_plusDouble(Value a, Value b) { return doubleValue(getDouble(a) + getDouble(b)); }
	Value f_plusInt(Value a, Value b) { return intValue(getInt(a) + getInt(b)); }
	Value f_id(Value x) { return x; }
	Value f_compose(Value f, Value g, Value x) { return apply1(f, apply1(g,x)); }
	Value f_map(Value f, Value xs) {
	int i, size = arraySize(xs);
	Value rs = (Value )malloc(sizeof rs size);
	for (i = 0; i < size; i++) {
	rs[i] = apply1(f, arrayElem(xs, i));
	}
	return arrayN(size, rs);
	}

	/* IO a === () -> a */
	Value f_IO_pure(Value x, Value _) { return x; }
	Value f_IO_print(Value x, Value _) { printValueNewline(x); return unitValue(); }
	Value f_IO_bind(Value a, Value b, Value _) {
	Value x = apply1(a, unitValue());
	Value m = apply1(b, x);
	return apply1(m, unitValue());
	}

	Value bindTest(Value dict, Value x, Value m) {
	/* pure x >>= m */
	return apply2(recordField(dict, "bind"),
	apply1(recordField(dict, "pure"), x),
	m);
	}

	int main(void) {
	{
	/* IO monad and basic types */
	Value IO_pure = function2("IO_pure", f_IO_pure);
	Value IO_bind = function3("IO_bind", f_IO_bind);
	Value IO_dict = record2("pure", IO_pure, "bind", IO_bind);
	Value IO_print = function2("IO_print", f_IO_print);
	Value sin = function1("sin", f_sin);
	Value id = function1("id", f_id);
	Value compose = function3("compose", f_compose);
	Value rec1 =
	record2(
	"foo", record2("a", f_map(apply1(function2("plus", f_plusInt),
	intValue(10)),
	array2(intValue(1), intValue(2))),
	"f", IO_bind),
	"bar", apply1(apply2(compose, id, sin), doubleValue(M_PI)));
	Value rec2 = recordExtend(rec1, "dori", stringValue("nori"));

	apply1(bindTest(IO_dict, rec2, IO_print), unitValue());
	}
	{
	/* relations */
	Value baseRel = relation2(
	"a", array2(doubleValue(1), doubleValue(2)),
	"b", array2(doubleValue(3), doubleValue(4)));
	Value relC = relationExtend(stringValue("foo"), emptyArray(), "c", baseRel);
	Value relD =
	relationExtend(
	function2("plus", f_plusDouble),
	array2(stringValue("a"), stringValue("b")), "d", relC);
	printRelation(relD);
	}
	return 0;
	}
No results found