Skip to content

Instantly share code, notes, and snippets.

@connorjclark
Last active August 7, 2024 13:57
Show Gist options
  • Save connorjclark/874f1034809ce475a8e3ea7e09a8cc40 to your computer and use it in GitHub Desktop.
Save connorjclark/874f1034809ce475a8e3ea7e09a8cc40 to your computer and use it in GitHub Desktop.
prime.wat
(module $ffc-2-Prime
(type $t0 (func (param i32)))
(type $t1 (func (param i32 i32 i32) (result i32)))
(type $t2 (func (param i32) (result i32)))
(type $t3 (func (param i32 i32)))
(type $t4 (func (param i32 i32 i32 i32 i32)))
(type $t5 (func))
(import "env" "memory" (memory $env.memory 1 32768 shared))
(import "env" "set_return_value" (func $env.set_return_value (type $t0)))
(import "env" "do_commands" (func $env.do_commands (type $t1)))
(import "env" "get_register" (func $env.get_register (type $t2)))
(import "env" "set_register" (func $env.set_register (type $t3)))
(import "env" "runtime_debug" (func $env.runtime_debug (type $t3)))
(func $run (type $t4) (param $p0 i32) (param $p1 i32) (param $p2 i32) (param $p3 i32) (param $p4 i32)
(global.set $ri
(local.get $p0))
(global.set $global_d
(local.get $p1))
(global.set $stack
(local.get $p2))
(global.set $ret_stack
(local.get $p3))
(global.set $ret_stack
(local.get $p3))
(global.set $sp
(i32.load offset=44
(local.get $p0)))
(global.set $target_block_id
(i32.load offset=48
(local.get $p0)))
(call $yielder)
(i32.store offset=44
(local.get $p0)
(global.get $sp)))
(func $yielder (type $t5)
(local $l0 i32)
(loop $L0
(block $B1
(block $B2
(block $B3
(block $B4
(block $B5
(block $B6
(block $B7
(block $B8
(block $B9
(block $B10
(block $B11
(block $B12
(br_table $B12 $B11 $B10 $B9 $B8 $B7 $B6 $B5 $B4 $B3 $B2 $B1 $L0 $B12
(global.get $target_block_id))))
(drop
(i32.const 0))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=4
(global.get $ri)))
(drop
(i32.const 1))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(call $env.get_register
(i32.const 2192)))
(drop
(i32.const 2))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 3))
(i32.store offset=20
(global.get $ri)
(i32.mul
(global.get $sp)
(i32.const 10000))))
(drop
(i32.const 4))
(i32.store offset=12
(global.get $ri)
(i32.const 20000))
(drop
(i32.const 5))
(i32.store
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri))))
(drop
(i32.const 6))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 7))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 8))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 2))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 9))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=16)
(drop
(i32.const 10))
(i32.store offset=12
(global.get $ri)
(i32.mul
(i32.le_s
(i32.load offset=16
(global.get $ri))
(i32.load offset=12
(global.get $ri)))
(i32.const 10000)))
(drop
(i32.const 12))
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(global.set $target_block_id
(i32.const 9))
(br_if $L0))
(drop
(i32.const 14))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 2))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 15))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 16))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 17))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=16)
(drop
(i32.const 18))
(i32.store offset=16
(global.get $ri)
(if $I13 (result i32)
(local.tee $l0
(i32.load offset=12
(global.get $ri)))
(then
(i32.rem_s
(i32.load offset=16
(global.get $ri))
(local.get $l0)))
(else
(i32.const 0))))
(drop
(i32.const 19))
(i32.store offset=12
(global.get $ri)
(i32.load offset=16
(global.get $ri)))
(drop
(i32.const 20))
(i32.store offset=12
(global.get $ri)
(i32.mul
(i32.ne
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(i32.const 10000)))
(drop
(i32.const 22))
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(global.set $target_block_id
(i32.const 6))
(br_if $L0))
(drop
(i32.const 24))
(global.set $target_block_id
(i32.const 8))
(br $L0))
(drop
(i32.const 25))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=20
(global.get $ri)))
(drop
(i32.const 26))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.const 31))
(drop
(i32.const 27))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 28))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 29))
(global.set $target_block_id
(i32.const 0))
(call $fn_1)
(drop
(i32.const 30))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=20)
(drop
(i32.const 31))
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(global.set $target_block_id
(i32.const 8))
(br_if $L0))
(drop
(i32.const 33))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 34))
(i32.store offset=16
(global.get $ri)
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 35))
(drop
(call $env.do_commands
(i32.const 35)
(i32.const 1)
(global.get $sp))))
(drop
(i32.const 36))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 37))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 38))
(i32.store offset=12
(global.get $ri)
(i32.add
(i32.load offset=12
(global.get $ri))
(i32.const 10000)))
(drop
(i32.const 39))
(i32.store
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 40))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=12)
(drop
(i32.const 41))
(global.set $target_block_id
(i32.const 3))
(br $L0))
(drop
(i32.const 42))
(i32.store offset=48
(global.get $ri)
(i32.const 10))
(drop
(call $env.do_commands
(i32.const 42)
(i32.const 1)
(global.get $sp)))
(return))
(drop
(i32.const 43))
(global.set $target_block_id
(i32.const 2))
(br $L0))
(drop
(i32.const 44))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 3))
(i32.const 1023)))
(i32.store offset=24
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.and
(i32.sub
(global.get $sp)
(i32.const 1))
(i32.const 1023))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 45))
(drop
(call $env.do_commands
(i32.const 45)
(i32.const 1)
(global.get $sp)))
(call $env.set_return_value
(i32.const 0))
(unreachable)
(br $L0)))
(func $fn_1 (type $t5)
(local $l0 i32)
(loop $L0
(block $B1
(block $B2
(block $B3
(block $B4
(block $B5
(block $B6
(block $B7
(block $B8
(br_table $B8 $B7 $B6 $B5 $B4 $B3 $B2 $B1 $L0 $B8
(global.get $target_block_id))))
(drop
(i32.const 46))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 47))
(i32.store offset=20
(global.get $ri)
(i32.mul
(global.get $sp)
(i32.const 10000)))
(drop
(i32.const 48))
(i32.store offset=12
(global.get $ri)
(i32.const 20000))
(drop
(i32.const 49))
(i32.store
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri))))
(drop
(i32.const 50))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 51))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 52))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 1))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 53))
(i32.store offset=12
(global.get $ri)
(i32.wrap_i64
(i64.div_s
(i64.mul
(i64.extend_i32_s
(i32.load offset=12
(global.get $ri)))
(i64.const 10000))
(i64.const 20000))))
(drop
(i32.const 54))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=16)
(drop
(i32.const 55))
(i32.store offset=12
(global.get $ri)
(i32.mul
(i32.le_s
(i32.load offset=16
(global.get $ri))
(i32.load offset=12
(global.get $ri)))
(i32.const 10000)))
(drop
(i32.const 57))
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(global.set $target_block_id
(i32.const 6))
(br_if $L0))
(drop
(i32.const 59))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 1))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 60))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 61))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 62))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=16)
(drop
(i32.const 63))
(i32.store offset=16
(global.get $ri)
(if $I9 (result i32)
(local.tee $l0
(i32.load offset=12
(global.get $ri)))
(then
(i32.rem_s
(i32.load offset=16
(global.get $ri))
(local.get $l0)))
(else
(i32.const 0))))
(drop
(i32.const 64))
(i32.store offset=12
(global.get $ri)
(i32.load offset=16
(global.get $ri)))
(drop
(i32.const 65))
(i32.store offset=12
(global.get $ri)
(i32.mul
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(i32.const 10000)))
(drop
(i32.const 67))
(i32.eq
(i32.load offset=12
(global.get $ri))
(i32.const 0))
(global.set $target_block_id
(i32.const 5))
(br_if $L0))
(drop
(i32.const 69))
(i32.store offset=12
(global.get $ri)
(i32.const 0))
(drop
(i32.const 70))
(global.set $target_block_id
(i32.const 7))
(br $L0))
(drop
(i32.const 71))
(i32.store offset=12
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 72))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const -1))
(i32.const 1023)))
(i32.store
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 73))
(i32.store offset=12
(global.get $ri)
(i32.add
(i32.load offset=12
(global.get $ri))
(i32.const 10000)))
(drop
(i32.const 74))
(i32.store
(i32.add
(i32.mul
(i32.add
(i32.div_u
(i32.load offset=20
(global.get $ri))
(i32.const 10000))
(i32.const 0))
(i32.const 4))
(global.get $stack))
(i32.load offset=12
(global.get $ri)))
(drop
(i32.const 75))
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(global.get $sp)
(i32.const 4))
(global.get $stack)))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(i32.store offset=12)
(drop
(i32.const 76))
(global.set $target_block_id
(i32.const 2))
(br $L0))
(drop
(i32.const 77))
(i32.store offset=12
(global.get $ri)
(i32.const 10000))
(drop
(i32.const 78))
(global.set $target_block_id
(i32.const 7))
(br $L0))
(drop
(i32.const 79))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 2))
(i32.const 1023)))
(i32.store offset=24
(global.get $ri)
(i32.load
(i32.add
(i32.mul
(i32.and
(i32.sub
(global.get $sp)
(i32.const 1))
(i32.const 1023))
(i32.const 4))
(global.get $stack))))
(drop
(i32.const 80))
(global.set $sp
(i32.and
(i32.add
(global.get $sp)
(i32.const 1))
(i32.const 1023)))
(return)
(drop
(i32.const 81))
(if $I10
(local.tee $l0
(call $env.do_commands
(i32.const 81)
(i32.const 1)
(global.get $sp)))
(then
(call $env.set_return_value
(local.get $l0))
(unreachable)))
(br $L0)))
(global $ri (mut i32) (i32.const 0))
(global $global_d (mut i32) (i32.const 0))
(global $stack (mut i32) (i32.const 0))
(global $ret_stack (mut i32) (i32.const 0))
(global $ret_stack_index (mut i32) (i32.const 0))
(global $sp (mut i32) (i32.const 0))
(global $target_block_id (mut i32) (i32.const 0))
(export "run" (func $run)))
yielder
0: PUSHR D0 [Block 0 -> 1]
1: PUSHR REFFFC
2: PUSHR D2
3: SETR D4 SP
4: SETV D2 20000 [Block 1 -> 2]
5: STORED D2 0
6: LOADD D2 0 [Block 2 -> 3, 8]
7: PUSHR D2
8: LOADD D2 20000
9: POP D3
10: COMPARER D3 D2
11: SETLESSI D2
12: COMPAREV D2 0
13: GOTOTRUE 42
14: LOADD D2 20000 [Block 3 -> 4, 5]
15: PUSHR D2
16: LOADD D2 0
17: POP D3
18: MODR D3 D2
19: SETR D2 D3
20: COMPAREV D2 0
21: SETFALSEI D2
22: COMPAREV D2 0
23: GOTOTRUE 25
24: GOTO 36 [Block 4 -> 7]
25: PUSHR D4 [Block 5 -> 6, 7]
26: PUSHV 31
27: LOADD D2 0
28: PUSHR D2
29: GOTO 46 [Call Function #1]
30: POP D4
31: COMPAREV D2 0
32: GOTOTRUE 36
33: LOADD D2 0 [Block 6 -> 7]
34: SETR D3 D2
35: TRACER D3
36: LOADD D2 0 [Block 7 -> 2]
37: PUSHR D2
38: ADDV D2 10000
39: STORED D2 0
40: POP D2
41: GOTO 6
42: WAITFRAME [Block 8 -> 9]
43: GOTO 4 [Block 9 -> 1]
44: POPARGS D5 3 [Block 10 -> ]
45: QUIT
Function #1
46: PUSHR D2 [Block 0 -> 1]
47: SETR D4 SP
48: SETV D2 20000
49: STORED D2 0
50: LOADD D2 0 [Block 1 -> 2, 5]
51: PUSHR D2
52: LOADD D2 10000
53: DIVV D2 20000
54: POP D3
55: COMPARER D3 D2
56: SETLESSI D2
57: COMPAREV D2 0
58: GOTOTRUE 77
59: LOADD D2 10000 [Block 2 -> 3, 4]
60: PUSHR D2
61: LOADD D2 0
62: POP D3
63: MODR D3 D2
64: SETR D2 D3
65: COMPAREV D2 0
66: SETTRUEI D2
67: COMPAREV D2 0
68: GOTOTRUE 71
69: SETV D2 0 [Block 3 -> 6]
70: GOTO 79
71: LOADD D2 0 [Block 4 -> 1]
72: PUSHR D2
73: ADDV D2 10000
74: STORED D2 0
75: POP D2
76: GOTO 50
77: SETV D2 10000 [Block 5 -> 6]
78: GOTO 79
79: POPARGS D5 2 [Block 6 -> ]
80: RETURN
81: 
// Calculates and prints to the screen the factors of the given number.
// Calculated every frame, as the point of this is to be a performance test.
ffc script Prime
{
void run(int num)
{
while (true)
{
for (int i = 2; i <= num; i++)
{
if (num % i != 0) continue;
if (is_prime(i)) Trace(i);
}
Waitframe();
}
}
bool is_prime(int x)
{
for (int i = 2; i <= x/2; i++)
{
if (x % i == 0)
{
return false;
}
}
return true;
}
}
@connorjclark
Copy link
Author

Nearly done with an early first pass on a JIT for the web build. It's actually not any faster yet, but that's expected. Prepare for a brain dump on what I've been working on the past week.

In this MVP I've generated a StructuredZasm of a script's ZASM instructions, which is annotated with all the function start/ends, what functions call each other, and what PCs transfer to other PCs. With that, I partition the functions into two groups: those that use WaitX and thus may need to yield (or may call a function that does), and all the rest. The first group is inlined into a single logical function called the "yielder"; all the functions in the second group are treated as a separate logical functions.

Why this "yielder" thing? It supports exiting at any WaitX point, and using a stored variable to "jump back" to that point the next time the function is called.

Why split everything into separate WASM functions, instead of one super large function like the x64 JIT does? Because WASM engines (like V8) have a tiered compiler, and will only use the slower optimizing compiler on a hot function - and the larger it is, the longer it takes to compile. Splitting in up to match the ZScript functions (as much as possible) should be much better for performance.

For each logical function, a control flow graph (ZasmCfg) is generated marking all the branches internal to this function. So it captures all
ifs and loops, and for the yielder function it includes WaitX and calls to other ZASM functions that may yield. Excluded are calls to other logical functions (so from the yielder function -> any non-yielding ZASM function, or a non-yielding ZASM function -> another non-yielding ZASM function) - simply because that information is not needed in the graph. Why are those calls not needed in the graph? I'll explain why after explaining how the CFG is used.

Quickly: a CFG is simply a graph of basic blocks. A basic block is a list of contiguous instructions, which may only transfer to another block as part of its last instruction (never in the middle). Further, the only way a basic block may be entered is via its first instruction, either by some block using a GOTO to jump to it, or via a previous block "falling through" to it (simply the pc advancing one). You can think of each block as a node in the graph, and the blocks it may go to as edges.

In WebAssembly, there is no GOTO equivalent. There are only structured controls - blocks, loops, ifs. Therefore, a simple translation for ZASM GOTOs is not available. Instead, the control flow of the script must be determined and used to generate the proper blocks, loops, and ifs in WebAssembly. In general, doing this for an arbitrary CFG is quite difficult, but its simpler in this case because ZScript has no "goto" construct, which means the ZASM we generate from it should not be "irreducible" - which mean a fairly complex step of "irreducible CFG" -> "reducible CFG" is not needed (Relooper is the popular method).

So I've got a CFG, which embeds the logical ifs and loops of a ZASM script; and the goal is to encode the basic blocks into the relevant WebAssembly control flow operations. This is actually a pretty difficult problem still, and I'm still chewing through an academic paper on the subject. But this is where I've taken a shortcut.

You can represent any CFG very simply, with just a single extra variable, a loop, and a switch statement. The drawback is the code performs very poorly, due to a combinations of excessive branching and an inability for the compiler to meaningfully optimize across branch boundraries. This explains what I mean clearly: https://medium.com/leaningtech/solving-the-structured-control-flow-problem-once-and-for-all-5123117b1ee2#:~:text=A%20universal%20but%20inefficient%20solution

Back to "Why are those calls (to other logical functions) not needed in the graph?". It's simply because those function calls can be compiled to WASM call instructions, which handles control flow entirely. So there's just no need to work out how to return to the point after that GOTO - it's just a function call.

So that's what I've done first, and the result is that scripts run slightly slower. Hopefully once I've worked through this paper, the end result will be even Yuurand being playable in the browser.

All this Structured ZASM/CFG stuff isn't useful just for the WASM JIT. It should be beneficial in other ways:

  1. For the x64 JIT to compile only part of the ZASM script, as needed, and potentially only if "hot". Don't waste time on unused functions
  2. A basic block is a natural unit of work to operate on for applying optimizations on already-generated ZASM. Since the blocks they transfer to are known, the change in the instruction count caused by optimizations should be simple to fixup in GOTOs
  3. Derive the number of parameters / if there is a function return, and attempt to move such parameter/return value passing away from the ZASM stack and to a native function call

@connorjclark
Copy link
Author

Note: the .wat is littered with pointless things like:

(drop
  (i32.const 47))

simply to "comment" that what follows is from ZASM instruction #47, for example.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment