Last active
December 19, 2023 05:27
-
-
Save RealNeGate/8077c99a2e7ad4ba3cd6d02e131de218 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is a PoC for incremental compilation for Zig, the idea is that we | |
// can break down all of type checking into a simple term rewriting system | |
// which has one step Church-Rosser. | |
// | |
// Church-Rosser (i'll shorten to CR): | |
// CR states that given our reductions terminate, the order of reduction is irrelevant, more | |
// importantly we can say that if we reduce to our normal form in one step per term then our | |
// total reductions are linear with the size of the IR. | |
// | |
// Term? | |
// Each term will map to some chunk of ZIR (decl or certain expressions) that has an associated | |
// hash (meaning it can be dirtied) and the "reduction" is the type checking step, for the purposes | |
// of this IR the terms are pure and their results only dependent on it's direct inputs. This is | |
// necessary for our CR. | |
// | |
// Time complexity: | |
// Once a node is evaluated it'll be done "forever" so the maximum number of | |
// evaluation steps scales linearly with the number of nodes, really with the | |
// number of rewritten nodes (which is whatever is affected by the changes directly | |
// or indirectly) | |
// | |
const std = @import("std"); | |
const print = std.debug.print; | |
const assert = std.debug.assert; | |
const Allocator = std.mem.Allocator; | |
const Worklist = struct { | |
visited: std.DynamicBitSet, | |
items: std.ArrayList(*Term), | |
pub fn alloc(allocator: Allocator) !Worklist { | |
return .{ | |
.visited = try std.DynamicBitSet.initEmpty(allocator, 1024), | |
.items = try std.ArrayList(*Term).initCapacity(allocator, 1024) | |
}; | |
} | |
pub fn push(self: *Worklist, t: *Term) !void { | |
if (self.visited.isSet(t.id)) { | |
return; | |
} | |
self.visited.set(t.id); | |
try self.items.append(t); | |
} | |
pub fn pop(self: *Worklist) ?*Term { | |
return self.popWithBase(0); | |
} | |
// if it reaches base, it'll stop | |
pub fn popWithBase(self: *Worklist, base: usize) ?*Term { | |
if (self.items.items.len == base) { | |
return null; | |
} else { | |
var t = self.items.items[self.items.items.len - 1]; | |
self.items.items.len -= 1; | |
self.visited.unset(t.id); | |
return t; | |
} | |
} | |
pub fn count(self: *Worklist) usize { | |
return self.items.items.len; | |
} | |
}; | |
// 0 is used for the magic Term in the sky first_compile | |
var id_counter: u32 = 1; | |
// these are currently useless but they'll gain purpose soon :p | |
const TermTag = enum { | |
// this term merely extracts a value from a parent term | |
proj, | |
decl, | |
call, | |
literal, | |
}; | |
const User = struct { | |
t: *Term, | |
index: u32, | |
}; | |
const Term = struct { | |
tag: TermTag, | |
id: u32, | |
needs_retype: bool = false, | |
// indirectly dirty inputs, once this reaches | |
// 0 the term can be processed. | |
dirty_ins: u16 = 0, | |
// direct changes hold a reference to their | |
// previous form, if there is none we can assume | |
// the node is either new or hasn't changed. | |
old: ?*Term = null, | |
ins: []*Term, | |
users: std.ArrayList(User), | |
// insert term-specific data here i guess | |
// ... | |
pub fn alloc(allocator: Allocator, tag: TermTag) !*Term { | |
return try allocWithInputs(allocator, tag, &[_]*Term{}); | |
} | |
pub fn allocWithInputs(allocator: Allocator, tag: TermTag, ins: []*Term) !*Term { | |
var t = try allocator.create(Term); | |
t.* = Term{ | |
// give each node a unique ID | |
.tag = tag, | |
.id = id_counter, | |
.ins = try allocator.alloc(*Term, ins.len), | |
.users = try std.ArrayList(User).initCapacity(allocator, 4) | |
}; | |
id_counter += 1; | |
// add our lovely inputs | |
std.mem.copy(*Term, t.ins, ins); | |
// fill user lists | |
for (ins, 0..) |in, i| { | |
print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); | |
try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); | |
} | |
return t; | |
} | |
pub fn set_in(t: *Term, in: *Term, i: usize) !void { | |
// remove old user | |
var old = t.ins[i]; | |
for (old.users.items, 0..) |u, user_i| { | |
if (u.index == i and u.t == t) { | |
_ = old.users.swapRemove(user_i); | |
break; | |
} | |
} | |
// add new user | |
print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); | |
try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); | |
t.ins[i] = in; | |
} | |
pub fn markUsers(t: *Term, ws: *Worklist, fwd: bool) !void { | |
for (t.users.items) |u| { | |
print(" * mark users {}\n", .{u.t.id}); | |
assert(u.t.dirty_ins > 0); | |
u.t.dirty_ins -= 1; | |
// make sure the children are marked to retype if there's any changes above, | |
// if possible to propagate only to clean nodes which don't actually change | |
// in which case we still walk the graph but don't spend time retyping (the | |
// actually expensive piece) | |
if (fwd) { | |
u.t.needs_retype = true; | |
} | |
if (u.t.dirty_ins == 0) { | |
// push any users which are ready to process now | |
try ws.push(u.t); | |
} | |
} | |
} | |
// returns true if it makes changes which affect other terms. | |
pub fn progress(t: *Term) bool { | |
// this isn't necessary, just making an example where | |
// changes propagate only if they're from leaf nodes. | |
// | |
// an example of a realistic return is that changing a | |
// non-comptime function body will not affect the deps. | |
// only changing it's prototype. | |
return t.ins.len == 0; | |
} | |
}; | |
// we start with only the changed nodes on the worklist | |
pub fn type_check(ws: *Worklist) !void { | |
//////////////////////////////// | |
// Pass 1: mark indirect dirty | |
//////////////////////////////// | |
var initial = ws.count(); | |
// i don't wanna allocate two worklists so i'll just do the work on the | |
// same worklist :p | |
var n: usize = 0; | |
for (0..initial) |i| { | |
var root_change = ws.items.items[i]; | |
root_change.needs_retype = true; | |
try ws.items.append(root_change); | |
while (ws.popWithBase(initial)) |t| { | |
n += 1; | |
for (t.users.items) |u| { | |
// user is now a lil dirty | |
if (u.t.old == null) { | |
u.t.dirty_ins += 1; | |
} | |
try ws.push(u.t); | |
} | |
} | |
// we're back to normal, set the visited flag on this node | |
assert(ws.count() == initial); | |
ws.visited.set(root_change.id); | |
} | |
//////////////////////////////// | |
// Pass 2: actually type check | |
//////////////////////////////// | |
var delays: usize = 0; | |
var progress: usize = 0; | |
while (ws.pop()) |t| { | |
print(" walk node {}?\n", .{t.id}); | |
if (t.dirty_ins != 0) { | |
print(" * delay node {}?\n", .{t.id}); | |
delays += 1; | |
continue; | |
} | |
if (t.needs_retype) { | |
print(" * progress on node {}\n", .{t.id}); | |
// dirty -> clean | |
var p = t.progress(); | |
try t.markUsers(ws, p); | |
// reset for the next time we might try to run type_check | |
t.needs_retype = false; | |
progress += 1; | |
} else { | |
print(" * cleaned but not retyped\n", .{}); | |
} | |
} | |
print("\n SUMMARY:\n", .{}); | |
print(" {:3} delays + {:3} typed => O({:3})\n", .{ delays, progress, delays + progress }); | |
print(" {:3} max + {:3} max => O({:3})\n\n\n", .{ initial - 1, n, n + initial - 1 }); | |
} | |
pub fn main() !void { | |
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; | |
const allocator = gpa.allocator(); | |
var ws = try Worklist.alloc(allocator); | |
// each time we add a node, we need to push it to | |
// the worklist, once we've got something in the cache | |
// we can avoid this process for some nodes. | |
print("=== parse ===\n", .{}); | |
// abcd | |
// |||| | |
// plus e | |
// | / | |
// jean | |
var a = try Term.alloc(allocator, .literal); | |
var b = try Term.alloc(allocator, .literal); | |
var c = try Term.alloc(allocator, .literal); | |
var d = try Term.alloc(allocator, .literal); | |
var plus = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ a, b, c, d }); | |
var e = try Term.alloc(allocator, .literal); | |
var jean = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ plus, e }); | |
try ws.push(a); | |
try ws.push(b); | |
try ws.push(c); | |
try ws.push(d); | |
try ws.push(plus); | |
try ws.push(e); | |
try ws.push(jean); | |
print("=== type check ===\n", .{}); | |
try type_check(&ws); | |
print("=== parse 2 ===\n", .{}); | |
var f = try Term.alloc(allocator, .literal); | |
try ws.push(f); | |
try plus.set_in(f, 1); | |
var g = try Term.alloc(allocator, .literal); | |
try ws.push(g); | |
try plus.set_in(g, 3); | |
try ws.push(plus); | |
print("=== type check 2 ===\n", .{}); | |
try type_check(&ws); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment