Skip to content

Instantly share code, notes, and snippets.

@RealNeGate
Last active December 19, 2023 05:27
Show Gist options
  • Save RealNeGate/8077c99a2e7ad4ba3cd6d02e131de218 to your computer and use it in GitHub Desktop.
Save RealNeGate/8077c99a2e7ad4ba3cd6d02e131de218 to your computer and use it in GitHub Desktop.
// This is a PoC for incremental compilation for Zig, the idea is that we
// can break down all of type checking into a simple term rewriting system
// which has one step Church-Rosser.
//
// Church-Rosser (i'll shorten to CR):
// CR states that given our reductions terminate, the order of reduction is irrelevant, more
// importantly we can say that if we reduce to our normal form in one step per term then our
// total reductions are linear with the size of the IR.
//
// Term?
// Each term will map to some chunk of ZIR (decl or certain expressions) that has an associated
// hash (meaning it can be dirtied) and the "reduction" is the type checking step, for the purposes
// of this IR the terms are pure and their results only dependent on it's direct inputs. This is
// necessary for our CR.
//
// Time complexity:
// Once a node is evaluated it'll be done "forever" so the maximum number of
// evaluation steps scales linearly with the number of nodes, really with the
// number of rewritten nodes (which is whatever is affected by the changes directly
// or indirectly)
//
const std = @import("std");
const print = std.debug.print;
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Worklist = struct {
visited: std.DynamicBitSet,
items: std.ArrayList(*Term),
pub fn alloc(allocator: Allocator) !Worklist {
return .{
.visited = try std.DynamicBitSet.initEmpty(allocator, 1024),
.items = try std.ArrayList(*Term).initCapacity(allocator, 1024)
};
}
pub fn push(self: *Worklist, t: *Term) !void {
if (self.visited.isSet(t.id)) {
return;
}
self.visited.set(t.id);
try self.items.append(t);
}
pub fn pop(self: *Worklist) ?*Term {
return self.popWithBase(0);
}
// if it reaches base, it'll stop
pub fn popWithBase(self: *Worklist, base: usize) ?*Term {
if (self.items.items.len == base) {
return null;
} else {
var t = self.items.items[self.items.items.len - 1];
self.items.items.len -= 1;
self.visited.unset(t.id);
return t;
}
}
pub fn count(self: *Worklist) usize {
return self.items.items.len;
}
};
// 0 is used for the magic Term in the sky first_compile
var id_counter: u32 = 1;
// these are currently useless but they'll gain purpose soon :p
const TermTag = enum {
// this term merely extracts a value from a parent term
proj,
decl,
call,
literal,
};
const User = struct {
t: *Term,
index: u32,
};
const Term = struct {
tag: TermTag,
id: u32,
needs_retype: bool = false,
// indirectly dirty inputs, once this reaches
// 0 the term can be processed.
dirty_ins: u16 = 0,
// direct changes hold a reference to their
// previous form, if there is none we can assume
// the node is either new or hasn't changed.
old: ?*Term = null,
ins: []*Term,
users: std.ArrayList(User),
// insert term-specific data here i guess
// ...
pub fn alloc(allocator: Allocator, tag: TermTag) !*Term {
return try allocWithInputs(allocator, tag, &[_]*Term{});
}
pub fn allocWithInputs(allocator: Allocator, tag: TermTag, ins: []*Term) !*Term {
var t = try allocator.create(Term);
t.* = Term{
// give each node a unique ID
.tag = tag,
.id = id_counter,
.ins = try allocator.alloc(*Term, ins.len),
.users = try std.ArrayList(User).initCapacity(allocator, 4)
};
id_counter += 1;
// add our lovely inputs
std.mem.copy(*Term, t.ins, ins);
// fill user lists
for (ins, 0..) |in, i| {
print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id });
try in.users.append(User{ .t = t, .index = @truncate(u32, i) });
}
return t;
}
pub fn set_in(t: *Term, in: *Term, i: usize) !void {
// remove old user
var old = t.ins[i];
for (old.users.items, 0..) |u, user_i| {
if (u.index == i and u.t == t) {
_ = old.users.swapRemove(user_i);
break;
}
}
// add new user
print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id });
try in.users.append(User{ .t = t, .index = @truncate(u32, i) });
t.ins[i] = in;
}
pub fn markUsers(t: *Term, ws: *Worklist, fwd: bool) !void {
for (t.users.items) |u| {
print(" * mark users {}\n", .{u.t.id});
assert(u.t.dirty_ins > 0);
u.t.dirty_ins -= 1;
// make sure the children are marked to retype if there's any changes above,
// if possible to propagate only to clean nodes which don't actually change
// in which case we still walk the graph but don't spend time retyping (the
// actually expensive piece)
if (fwd) {
u.t.needs_retype = true;
}
if (u.t.dirty_ins == 0) {
// push any users which are ready to process now
try ws.push(u.t);
}
}
}
// returns true if it makes changes which affect other terms.
pub fn progress(t: *Term) bool {
// this isn't necessary, just making an example where
// changes propagate only if they're from leaf nodes.
//
// an example of a realistic return is that changing a
// non-comptime function body will not affect the deps.
// only changing it's prototype.
return t.ins.len == 0;
}
};
// we start with only the changed nodes on the worklist
pub fn type_check(ws: *Worklist) !void {
////////////////////////////////
// Pass 1: mark indirect dirty
////////////////////////////////
var initial = ws.count();
// i don't wanna allocate two worklists so i'll just do the work on the
// same worklist :p
var n: usize = 0;
for (0..initial) |i| {
var root_change = ws.items.items[i];
root_change.needs_retype = true;
try ws.items.append(root_change);
while (ws.popWithBase(initial)) |t| {
n += 1;
for (t.users.items) |u| {
// user is now a lil dirty
if (u.t.old == null) {
u.t.dirty_ins += 1;
}
try ws.push(u.t);
}
}
// we're back to normal, set the visited flag on this node
assert(ws.count() == initial);
ws.visited.set(root_change.id);
}
////////////////////////////////
// Pass 2: actually type check
////////////////////////////////
var delays: usize = 0;
var progress: usize = 0;
while (ws.pop()) |t| {
print(" walk node {}?\n", .{t.id});
if (t.dirty_ins != 0) {
print(" * delay node {}?\n", .{t.id});
delays += 1;
continue;
}
if (t.needs_retype) {
print(" * progress on node {}\n", .{t.id});
// dirty -> clean
var p = t.progress();
try t.markUsers(ws, p);
// reset for the next time we might try to run type_check
t.needs_retype = false;
progress += 1;
} else {
print(" * cleaned but not retyped\n", .{});
}
}
print("\n SUMMARY:\n", .{});
print(" {:3} delays + {:3} typed => O({:3})\n", .{ delays, progress, delays + progress });
print(" {:3} max + {:3} max => O({:3})\n\n\n", .{ initial - 1, n, n + initial - 1 });
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
var ws = try Worklist.alloc(allocator);
// each time we add a node, we need to push it to
// the worklist, once we've got something in the cache
// we can avoid this process for some nodes.
print("=== parse ===\n", .{});
// abcd
// ||||
// plus e
// | /
// jean
var a = try Term.alloc(allocator, .literal);
var b = try Term.alloc(allocator, .literal);
var c = try Term.alloc(allocator, .literal);
var d = try Term.alloc(allocator, .literal);
var plus = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ a, b, c, d });
var e = try Term.alloc(allocator, .literal);
var jean = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ plus, e });
try ws.push(a);
try ws.push(b);
try ws.push(c);
try ws.push(d);
try ws.push(plus);
try ws.push(e);
try ws.push(jean);
print("=== type check ===\n", .{});
try type_check(&ws);
print("=== parse 2 ===\n", .{});
var f = try Term.alloc(allocator, .literal);
try ws.push(f);
try plus.set_in(f, 1);
var g = try Term.alloc(allocator, .literal);
try ws.push(g);
try plus.set_in(g, 3);
try ws.push(plus);
print("=== type check 2 ===\n", .{});
try type_check(&ws);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment