-
-
Save sgoguen/46200419194eb29b82079b0804e58c34 to your computer and use it in GitHub Desktop.
module NatPatterns = | |
type nat = int64 | |
let nat(x) = int64(x) | |
// Rosenberg-Strong unpairing function (balances better than Cantor's) | |
// A true total function when you replace int64 with bigint | |
let unpair(z: nat): (nat * nat) = | |
let m = nat(floor (sqrt (double z))) | |
let ml = z - (m * m) | |
if ml < m then (ml, m) | |
else (m, (m * m) + m + m - z) | |
// Our unpairing function makes for a nice total active pattern | |
let (|Pair|) z = unpair z | |
// An AP for alternating between partitions | |
let inline (|Part1of2|Part2of2|) z = | |
if z % 2 = 0 then Part1of2(z / 2) else Part2of2(z / 2) | |
// An AP for alternating between 3 paritions | |
let inline (|Part1of3|Part2of3|Part3of3|) z = | |
match z % 3L with | |
| 0L -> Part1of3(z / 3L) | |
| 1L -> Part2of3(z / 3L) | |
| _ -> Part3of3(z / 3L) | |
// Here we're going to partition based on some type of max value | |
type NHead = Head of nat | Tail of nat | |
let takeMax m = function | |
| n when n <= m -> Head(n) | |
| n -> Tail(n - (m + 1L)) | |
module NatPatternExamples = | |
open NatPatterns | |
type BTree<'a> = | |
| Empty | |
| Node of Value:'a * Left:BTree<'a> * Right:BTree<'a> | |
// Can we construct an AP that converts an integer into a binary-tree? | |
let rec (|BTree|) = takeMax 0 >> function | |
| Head (n) -> Empty | |
| Tail (Pair(value, Pair(BTree (left), BTree (right)))) -> Node(value, left, right) | |
// Let's convert an int64 into a BTree | |
let (BTree(t)) = 10 | |
printfn "Tree: %A" t | |
type Expr<'a> = | |
| Var of 'a | |
| Lam of 'a * Expr<'a> | |
| App of Expr<'a> * Expr<'a> | |
// Let's convert an int64 into a Lambda expression | |
let rec (|Expr|) = function | |
| Part1of3(n) -> Var(n) | |
| Part2of3(Pair(v, Expr(e))) -> Lam(v, e) | |
| Part3of3(Pair(Expr(e1), Expr(e2))) -> App(e1, e2) | |
let (Expr(e)) = 10 | |
printfn "Expression: %A" e | |
Your bijection works but has exponential overhead for common terms like Church numerals,
whose nested applications have one side of always constant size. Allow me to suggest a more balanced approach.
The number of closed terms of size n bits in a simple encoding is given by https://oeis.org/A114852 using the formula
a(n) = N(0,n) with
N(k,0) = N(k,1) = 0
N(k,n+2) = (if k>n then 1 else 0) +
N(k+1,n) +
Sum_{i=0..n} N(k,i) * N(k,n-i)
which suggests the following total ordering of closed terms, equivalent to a bijection:
data T = V Int | L T | A T T deriving (Eq,Show)
closed :: Int -> Int -> [T]
closed k n = if n < 2 then [] else
[V (n-2) | n>=2]
++ [L t | t <- closed (k+1) (n-2)]
++ [A t1 t2 | i <- [0..n-2], t1 <- closed k i, t2 <- closed k (n-2-i)]
allclosed = [0..] >>= closed 0
main = mapM_ print . take 42 $ allclosed
with output
V 0
V 1
V 2
L (V 0)
V 3
L (V 1)
V 4
L (V 2)
L (L (V 0))
A (V 0) (V 0)
V 5
L (V 3)
L (L (V 1))
A (V 0) (V 1)
A (V 1) (V 0)
V 6
L (V 4)
L (L (V 2))
L (L (L (V 0)))
L (A (V 0) (V 0))
A (V 0) (V 2)
A (V 0) (L (V 0))
A (V 1) (V 1)
A (V 2) (V 0)
A (L (V 0)) (V 0)
V 7
L (V 5)
L (L (V 3))
L (L (L (V 1)))
L (A (V 0) (V 1))
L (A (V 1) (V 0))
A (V 0) (V 3)
A (V 0) (L (V 1))
A (V 1) (V 2)
A (V 1) (L (V 0))
A (V 2) (V 1)
A (L (V 0)) (V 1)
A (V 3) (V 0)
A (L (V 1)) (V 0)
V 8
L (V 6)
L (L (V 4))
The index of any term in this sequence, written in binary, is always less than its simple binary encoding. For example, Church numeral 5 = L (L (A (V 1) (A (V 1) (A (V 1) (A (V 1) (A (V 1) (V 0))))))))
appears at index 8194786 in this list, while its simple encoding is
00 00 01 110 01 110 01 110 01 110 01 110 10 corresponding to 2208098105.
This is because the simple encoding also leaves room for encoding open terms.
@tromp - Thank you for your suggestions and encouragement.
While I've been meaning to explore efficient binary encoding systems, my interest in viewing recursive data structures through the lens of natural numbers has been mostly motivated by nagging questions around defining bijections with recursive data types and how to define these bijections so they are complete for the datatype or for invariant constraints. To demonstrate this, I've included an example (unlike the above snippet) where I create a bijection from Nat <==> closed lambda terms and I show how it corresponds to BLC.
As I'm sure you already appreciate, from a runtime efficiency standpoint, using pairing functions (I prefer balanced over Cantor's) isn't great because constructing larger terms from very big integers incurs a lot of square root calls for large sets, but I do like it from a Kolmogorov complexity point of view because I can encode λ0 as 0 and everything other closed term is accounted for with its own unique natural number. To your point, always being able to represent 2^N closed terms with N bits is nice.
As much as I'd love to explore more optimal ways of encoding it with Algorithmic Information Theory, I'm a self-taught programmer who hasn't added that to my curriculum yet, but I look forward to reading up on it!
Anyway, I've attached my bijective code for counting closed terms. (The above code was only supposed to demonstrate F#'s Active Pattern feature) I haven't proven this code is bijective yet as I'm still learning the Lean theorem prover, but I've run it for very large integers (I have a bigint implementation somewhere)
Again, thank you for the link and suggestions!