Skip to content

Instantly share code, notes, and snippets.

@pepijndevos
Created November 11, 2024 17:31
Show Gist options
  • Save pepijndevos/024d2b67e090ab24307108e20967be70 to your computer and use it in GitHub Desktop.
Save pepijndevos/024d2b67e090ab24307108e20967be70 to your computer and use it in GitHub Desktop.
HVM in Python
"""
HVML (Higher-order Virtual Machine Language) Implementation
The runtime memory consists of a vector of ports (represented as Terms), where each port has:
- A 3-bit tag indicating the type of node it connects to
- A location value pointing to where that node's ports begin in memory
Memory Model
-----------
A port represents a wire connected to a node's main port. The tag indicates the type
of node being connected to, while the location points to where that node's ports begin.
Node Layout
----------
Nodes are implicitly formed by groups of ports in memory. Binary nodes (APP, LAM, SUP, DUP)
use 2 consecutive memory slots for their ports:
Duplication Node:
- [0] => either ERA or ARG pointing to 1st variable location
- [1] => either ERA or ARG pointing to 2nd variable location
- [2] => pointer to duplicated expression
Lambda Node:
- [0] => either ERA or ARG pointing to variable location
- [1] => pointer to lambda's body
Application Node:
- [0] => pointer to function
- [1] => pointer to argument
Superposition Node:
- [0] => pointer to first term
- [1] => pointer to second term
Variables point to their binding site (location in a LAM/DUP node).
The main port of each node is implicit in the tree structure.
"""
from dataclasses import dataclass
from enum import Enum
import time
from typing import List
class Tag(Enum):
"""Tags identify what type of node a port connects to"""
DP0 = 'DP0' # Port connecting to 1st variable of a duplication node
DP1 = 'DP1' # Port connecting to 2nd variable of a duplication node
VAR = 'VAR' # Port connecting to a variable occurrence
APP = 'APP' # Port connecting to an application node
ERA = 'ERA' # Port connecting to an eraser node
LAM = 'LAM' # Port connecting to a lambda node
SUP = 'SUP' # Port connecting to a superposition node
SUB = 'SUB' # Port connecting to a substitution node
@dataclass
class Term:
"""
A Term represents a port/wire in the graph
Fields:
tag: Type of node this port connects to
loc: Location where that node's ports begin in memory
"""
tag: Tag
loc: int
@dataclass
class Heap:
"""Runtime memory representation"""
mem: List[Term] # Global memory array of ports
stk: List[Term] # Evaluation stack for tracking active pairs
itr: int = 0 # Interaction count
def __init__(self):
"""Create new heap with empty memory"""
self.mem = []
self.stk = []
self.itr = 0
def alloc(self, *terms: Term) -> int:
"""
Allocate consecutive ports in memory
Args:
terms: Ports to allocate
Returns:
Starting index of allocated ports
"""
start = len(self.mem)
self.mem.extend(terms)
return start
def get_var_key(term: Term) -> int:
"""
Get binding location for a variable port
For VAR ports returns direct location
For DP0/DP1 points to corresponding slot in DUP node
"""
if term.tag == Tag.VAR:
return term.loc
elif term.tag == Tag.DP0:
return term.loc
elif term.tag == Tag.DP1:
return term.loc + 1
return 0
def reduce_app_era(heap: Heap, app: Term, era: Term) -> Term:
"""
Application to erased term:
(* a)
----- APP_ERA
*
When a function is applied to an erased term, the result is erased.
This propagates erasure through the graph, implementing garbage collection.
"""
heap.itr += 1
return era
def reduce_app_lam(heap: Heap, app: Term, lam: Term) -> Term:
"""
Beta reduction (function application):
(λx(body) a)
------------ APP_LAM
x <- a
body
When a lambda meets an argument in an application, substitute the argument
for the lambda's variable and continue with the body. This is the core
computational step of the lambda calculus.
"""
heap.itr += 1
app_loc = app.loc
lam_loc = lam.loc
arg = heap.mem[app_loc + 1]
bod = heap.mem[lam_loc + 1]
heap.mem[lam_loc] = arg
return bod
def reduce_app_sup(heap: Heap, app: Term, sup: Term) -> Term:
"""
Distribute application over superposition:
({a b} c)
--------------- APP_SUP
& {x0 x1} = c
{(a x0) (b x1)}
When applying a superposed term, duplicate the argument and apply each
part of the superposition to a copy. This implements sharing of computation
across duplicated terms.
"""
heap.itr += 1
app_loc = app.loc
sup_loc = sup.loc
arg = heap.mem[app_loc + 1]
tm0 = heap.mem[sup_loc]
tm1 = heap.mem[sup_loc + 1]
du0 = heap.alloc(
Term(Tag.SUB, 0),
Term(Tag.SUB, 0),
arg
)
ap0 = heap.alloc(
tm0,
Term(Tag.DP0, du0)
)
ap1 = heap.alloc(
tm1,
Term(Tag.DP1, du0)
)
su0 = heap.alloc(
Term(Tag.APP, ap0),
Term(Tag.APP, ap1)
)
return Term(Tag.SUP, su0)
def reduce_dup_era(heap: Heap, dup: Term, era: Term) -> Term:
"""
Duplicate erasure:
& {x y} = *
----------- DUP_ERA
x <- *
y <- *
When duplicating an erased term, both copies become erased.
This propagates erasure through fan nodes.
"""
heap.itr += 1
dup_loc = dup.loc
dup_num = 0 if dup.tag == Tag.DP0 else 1
heap.mem[dup_loc] = era
heap.mem[dup_loc + 1] = era
return heap.mem[dup_loc + dup_num]
def reduce_dup_lam(heap: Heap, dup: Term, lam: Term) -> Term:
"""
Duplicate lambda:
& {r s} = λx(f)
--------------- DUP_LAM
& {f0 f1} = f
r <- λx0(f0)
s <- λx1(f1)
x <- {x0 x1}
When duplicating a lambda, create two new lambdas. The body is duplicated
and each copy goes to the corresponding new lambda. The variable becomes
a superposition connecting to both new lambdas. This is how computation
is shared inside lambda terms.
"""
heap.itr += 1
dup_loc = dup.loc
dup_num = 0 if dup.tag == Tag.DP0 else 1
lam_loc = lam.loc
bod = heap.mem[lam_loc + 1]
du0 = heap.alloc(
Term(Tag.SUB, 0),
Term(Tag.SUB, 0),
bod
)
lm0 = heap.alloc(
Term(Tag.SUB, 0),
Term(Tag.DP0, du0)
)
lm1 = heap.alloc(
Term(Tag.SUB, 0),
Term(Tag.DP1, du0)
)
su0 = heap.alloc(
Term(Tag.VAR, lm0),
Term(Tag.VAR, lm1)
)
heap.mem[dup_loc] = Term(Tag.LAM, lm0)
heap.mem[dup_loc + 1] = Term(Tag.LAM, lm1)
heap.mem[lam_loc] = Term(Tag.SUP, su0)
return heap.mem[dup_loc + dup_num]
def reduce_dup_sup(heap: Heap, dup: Term, sup: Term) -> Term:
"""
Duplicate superposition:
& {x y} = {a b}
--------------- DUP_SUP
x <- a
y <- b
When duplicating a superposition, just connect each port directly.
This is a key rule that allows computation to be shared optimally.
"""
heap.itr += 1
dup_loc = dup.loc
dup_num = 0 if dup.tag == Tag.DP0 else 1
sup_loc = sup.loc
tm0 = heap.mem[sup_loc]
tm1 = heap.mem[sup_loc + 1]
heap.mem[dup_loc] = tm0
heap.mem[dup_loc + 1] = tm1
return heap.mem[dup_loc + dup_num]
def reduce(heap: Heap, term: Term) -> Term:
"""
Main reduction function that evaluates a term to weak normal form
The algorithm traverses the graph looking for redexes (reducible expressions).
It follows main ports down the graph using a stack to track the path, and
when it finds a redex, performs the appropriate reduction rule.
The stack allows returning back up the graph after reduction and updating
parent pointers to maintain the graph structure.
"""
next_term = term
while True:
# === Walking Down Phase ===
# Follow main ports until we find a redex or hit a base case
if next_term.tag == Tag.APP:
# For applications, follow the function position (first port)
# Push APP node to stack so we can return to it later
heap.stk.append(next_term)
next_term = heap.mem[next_term.loc] # Follow function port
continue
elif next_term.tag in (Tag.DP0, Tag.DP1):
# For duplicator ports, we're either:
# 1. At an incomplete duplication (SUB tag) - follow expression port
# 2. At a completed duplication - follow the substituted term
key = get_var_key(next_term) # Get location of dup node
sub = heap.mem[key] # Look up what's at that location
if sub.tag == Tag.SUB:
# Duplication in progress - push DUP node and follow expression
heap.stk.append(next_term)
next_term = heap.mem[next_term.loc + 2] # Follow expression port
continue
else:
# Duplication done - directly follow substituted term
next_term = sub
continue
elif next_term.tag == Tag.VAR:
# For variables, follow their binding unless it's a SUB marker
key = get_var_key(next_term)
sub = heap.mem[key]
if sub.tag == Tag.SUB:
# Variable points to SUB - stop here, can't reduce further
break
else:
# Follow where variable points to
next_term = sub
continue
# === Reduction Phase ===
# We've hit a node that can't be followed further
# Try to reduce it with the parent node on the stack
else:
if not heap.stk:
# No parent nodes - we're done
break
else:
prev = heap.stk.pop() # Get parent node
if prev.tag == Tag.APP:
# Try application reductions based on what we found
if next_term.tag == Tag.ERA:
next_term = reduce_app_era(heap, prev, next_term)
continue
elif next_term.tag == Tag.LAM:
next_term = reduce_app_lam(heap, prev, next_term)
continue
elif next_term.tag == Tag.SUP:
next_term = reduce_app_sup(heap, prev, next_term)
continue
# No valid reduction - stop here
break
elif prev.tag in (Tag.DP0, Tag.DP1):
# Try duplication reductions based on what we found
if next_term.tag == Tag.ERA:
next_term = reduce_dup_era(heap, prev, next_term)
continue
elif next_term.tag == Tag.LAM:
next_term = reduce_dup_lam(heap, prev, next_term)
continue
elif next_term.tag == Tag.SUP:
next_term = reduce_dup_sup(heap, prev, next_term)
continue
# No valid reduction - stop here
break
# No other reductions possible with this parent
break
# === Return Phase ===
if not heap.stk:
# No parents left - return final term
return next_term
else:
# Update parent pointer and return root of expression
host = heap.stk[-1]
if host.tag == Tag.APP:
# Update function position of application
heap.mem[host.loc] = next_term
elif host.tag in (Tag.DP0, Tag.DP1):
# Update expression being duplicated
heap.mem[host.loc + 2] = next_term
# Return the root node (first on stack)
return heap.stk[0]
def normal(heap: Heap, term: Term) -> Term:
"""
Normalize a term by reducing to normal form
For each node type:
- APP: Normalize function and argument
- LAM: Normalize body
- SUP: Normalize both terms
- DUP: Normalize duplicated expression
"""
wnf = reduce(heap, term)
loc = wnf.loc
if wnf.tag == Tag.APP:
fun = heap.mem[loc]
fun = normal(heap, fun)
arg = heap.mem[loc + 1]
arg = normal(heap, arg)
heap.mem[loc] = fun
heap.mem[loc + 1] = arg
return wnf
elif wnf.tag == Tag.LAM:
bod = heap.mem[loc + 1]
bod = normal(heap, bod)
heap.mem[loc + 1] = bod
return wnf
elif wnf.tag == Tag.SUP:
tm0 = heap.mem[loc]
tm0 = normal(heap, tm0)
tm1 = heap.mem[loc + 1]
tm1 = normal(heap, tm1)
heap.mem[loc] = tm0
heap.mem[loc + 1] = tm1
return wnf
elif wnf.tag in (Tag.DP0, Tag.DP1):
val = heap.mem[loc + 2]
val = normal(heap, val)
heap.mem[loc + 2] = val
return wnf
else:
return wnf
def inject_P24(heap: Heap):
"""Injects a test program into the heap.
The program corresponds to a complex higher-order computation
that tests all aspects of the interaction combinator implementation:
- Lambda abstraction and application
- Variable handling
- Duplication and sharing
- Superposition
"""
# Create nodes in heap - direct port of C initialization
heap.mem = [
Term(Tag.APP,0x000000001),
Term(Tag.APP,0x000000003),
Term(Tag.LAM,0x0000000ed),
Term(Tag.LAM,0x000000005),
Term(Tag.LAM,0x0000000df),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000d9),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.VAR,0x000000005),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000000d),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000000f),
Term(Tag.DP0,0x000000007),
Term(Tag.APP,0x000000011),
Term(Tag.DP1,0x000000007),
Term(Tag.VAR,0x00000000d),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000016),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000018),
Term(Tag.DP0,0x00000000a),
Term(Tag.APP,0x00000001a),
Term(Tag.DP1,0x00000000a),
Term(Tag.VAR,0x000000016),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000001f),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000021),
Term(Tag.DP0,0x000000013),
Term(Tag.APP,0x000000023),
Term(Tag.DP1,0x000000013),
Term(Tag.VAR,0x00000001f),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000028),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000002a),
Term(Tag.DP0,0x00000001c),
Term(Tag.APP,0x00000002c),
Term(Tag.DP1,0x00000001c),
Term(Tag.VAR,0x000000028),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000031),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000033),
Term(Tag.DP0,0x000000025),
Term(Tag.APP,0x000000035),
Term(Tag.DP1,0x000000025),
Term(Tag.VAR,0x000000031),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000003a),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000003c),
Term(Tag.DP0,0x00000002e),
Term(Tag.APP,0x00000003e),
Term(Tag.DP1,0x00000002e),
Term(Tag.VAR,0x00000003a),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000043),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000045),
Term(Tag.DP0,0x000000037),
Term(Tag.APP,0x000000047),
Term(Tag.DP1,0x000000037),
Term(Tag.VAR,0x000000043),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000004c),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000004e),
Term(Tag.DP0,0x000000040),
Term(Tag.APP,0x000000050),
Term(Tag.DP1,0x000000040),
Term(Tag.VAR,0x00000004c),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000055),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000057),
Term(Tag.DP0,0x000000049),
Term(Tag.APP,0x000000059),
Term(Tag.DP1,0x000000049),
Term(Tag.VAR,0x000000055),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000005e),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000060),
Term(Tag.DP0,0x000000052),
Term(Tag.APP,0x000000062),
Term(Tag.DP1,0x000000052),
Term(Tag.VAR,0x00000005e),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000067),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000069),
Term(Tag.DP0,0x00000005b),
Term(Tag.APP,0x00000006b),
Term(Tag.DP1,0x00000005b),
Term(Tag.VAR,0x000000067),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000070),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000072),
Term(Tag.DP0,0x000000064),
Term(Tag.APP,0x000000074),
Term(Tag.DP1,0x000000064),
Term(Tag.VAR,0x000000070),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000079),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000007b),
Term(Tag.DP0,0x00000006d),
Term(Tag.APP,0x00000007d),
Term(Tag.DP1,0x00000006d),
Term(Tag.VAR,0x000000079),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000082),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000084),
Term(Tag.DP0,0x000000076),
Term(Tag.APP,0x000000086),
Term(Tag.DP1,0x000000076),
Term(Tag.VAR,0x000000082),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000008b),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000008d),
Term(Tag.DP0,0x00000007f),
Term(Tag.APP,0x00000008f),
Term(Tag.DP1,0x00000007f),
Term(Tag.VAR,0x00000008b),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x000000094),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x000000096),
Term(Tag.DP0,0x000000088),
Term(Tag.APP,0x000000098),
Term(Tag.DP1,0x000000088),
Term(Tag.VAR,0x000000094),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x00000009d),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x00000009f),
Term(Tag.DP0,0x000000091),
Term(Tag.APP,0x0000000a1),
Term(Tag.DP1,0x000000091),
Term(Tag.VAR,0x00000009d),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000a6),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000a8),
Term(Tag.DP0,0x00000009a),
Term(Tag.APP,0x0000000aa),
Term(Tag.DP1,0x00000009a),
Term(Tag.VAR,0x0000000a6),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000af),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000b1),
Term(Tag.DP0,0x0000000a3),
Term(Tag.APP,0x0000000b3),
Term(Tag.DP1,0x0000000a3),
Term(Tag.VAR,0x0000000af),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000b8),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000ba),
Term(Tag.DP0,0x0000000ac),
Term(Tag.APP,0x0000000bc),
Term(Tag.DP1,0x0000000ac),
Term(Tag.VAR,0x0000000b8),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000c1),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000c3),
Term(Tag.DP0,0x0000000b5),
Term(Tag.APP,0x0000000c5),
Term(Tag.DP1,0x0000000b5),
Term(Tag.VAR,0x0000000c1),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000ca),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000cc),
Term(Tag.DP0,0x0000000be),
Term(Tag.APP,0x0000000ce),
Term(Tag.DP1,0x0000000be),
Term(Tag.VAR,0x0000000ca),
Term(Tag.SUB,0x000000000),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000d3),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000d5),
Term(Tag.DP0,0x0000000c7),
Term(Tag.APP,0x0000000d7),
Term(Tag.DP1,0x0000000c7),
Term(Tag.VAR,0x0000000d3),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000db),
Term(Tag.DP0,0x0000000d0),
Term(Tag.APP,0x0000000dd),
Term(Tag.DP1,0x0000000d0),
Term(Tag.VAR,0x0000000d9),
Term(Tag.SUB,0x000000000),
Term(Tag.APP,0x0000000e1),
Term(Tag.APP,0x0000000e3),
Term(Tag.LAM,0x0000000e9),
Term(Tag.VAR,0x0000000df),
Term(Tag.LAM,0x0000000e5),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000e7),
Term(Tag.SUB,0x000000000),
Term(Tag.VAR,0x0000000e7),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000eb),
Term(Tag.SUB,0x000000000),
Term(Tag.VAR,0x0000000e9),
Term(Tag.SUB,0x000000000),
Term(Tag.LAM,0x0000000ef),
Term(Tag.SUB,0x000000000),
Term(Tag.VAR,0x0000000ed),
]
def main():
"""Main entry point"""
heap = Heap()
inject_P24(heap)
start_time = time.time()
# Normalize and get interaction count
root = heap.mem[0]
normal(heap, root)
end_time = time.time()
duration = end_time - start_time
print(f"Itrs: {heap.itr}")
print(f"Size: {len(heap.mem)} nodes")
print(f"Time: {duration:.2f} seconds")
print(f"MIPS: {(heap.itr / 1_000_000.0) / duration:.2f}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment