Skip to content

Instantly share code, notes, and snippets.

@patrickdet
Created September 2, 2025 09:53
Show Gist options
  • Save patrickdet/45ea8051163d48f755cc1a6d8cb10240 to your computer and use it in GitHub Desktop.
Save patrickdet/45ea8051163d48f755cc1a6d8cb10240 to your computer and use it in GitHub Desktop.
#!/usr/bin/env elixir
# Clean, focused benchmark for comparing main vs tokio branch
Mix.install([
{:wasmex, path: "."},
{:benchee, "~> 1.3"}
])
wat_compute = """
(module
(func $nop (export "nop"))
(func $add (export "add") (param $a i32) (param $b i32) (result i32)
local.get $a
local.get $b
i32.add
)
)
"""
wat_io = """
(module
(import "env" "sleep_ms" (func $sleep (param i32)))
(func $io_operation (export "io_operation") (param $delay i32)
local.get $delay
call $sleep
)
(func $mixed_operation (export "mixed_operation") (param $delay i32) (result i32)
;; Do some computation
i32.const 42
i32.const 58
i32.add
drop ;; Drop the computation result
;; Then sleep
local.get $delay
call $sleep
;; Return result
i32.const 100
)
)
"""
IO.puts("\nWASMEX PERFORMANCE BENCHMARK")
IO.puts("=" <> String.duplicate("=", 60))
{branch, 0} = System.cmd("git", ["branch", "--show-current"])
IO.puts("Branch: #{String.trim(branch)}")
IO.puts("")
IO.puts("METHODOLOGY")
IO.puts("-" <> String.duplicate("-", 60))
IO.puts("This benchmark measures WebAssembly execution performance in three scenarios:")
IO.puts("")
IO.puts("1. CONCURRENT CALLS: Multiple async tasks calling a single instance")
IO.puts(" - Tests how well the runtime handles concurrent access to one instance")
IO.puts(" - Measures coordination overhead and potential lock contention")
IO.puts("")
IO.puts("2. PARALLEL INSTANCES: Multiple pre-created instances executing in parallel")
IO.puts(" - Tests how well the runtime scales across multiple instances")
IO.puts(" - Each instance receives one call per round")
IO.puts("")
IO.puts("3. SUSTAINED THROUGHPUT: Continuous execution over fixed time period")
IO.puts(" - Tests maximum sustained operations per second")
IO.puts(" - Removes coordination overhead to show raw performance")
IO.puts("")
IO.puts("4. I/O-BOUND WORKLOAD: Operations with host-provided sleep")
IO.puts(" - Tests how runtime handles blocking/waiting operations")
IO.puts(" - Reveals async (Tokio) vs thread-blocking (OS threads) behavior")
IO.puts(" - Shows scaling limits when operations wait for I/O")
IO.puts("")
IO.puts("WASM FUNCTIONS:")
IO.puts("- Tests 1-3: Simple integer addition (CPU-bound, ~3 WASM instructions)")
IO.puts("- Test 4: Host sleep import (I/O-bound, tests async handling)")
IO.puts("")
# Test 1: Single instance handling concurrent calls
IO.puts("TEST 1: Single Instance Concurrent Calls")
IO.puts("-" <> String.duplicate("-", 40))
{:ok, pid} = Wasmex.start_link(%{bytes: wat_compute})
# Warmup
for _ <- 1..100, do: Wasmex.call_function(pid, :add, [1, 2])
suite1 = Benchee.run(
%{
"1 call" => fn ->
Wasmex.call_function(pid, :add, [1, 2])
end,
"10 concurrent" => fn ->
tasks = for _ <- 1..10 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks)
end,
"100 concurrent" => fn ->
tasks = for _ <- 1..100 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks)
end,
"1000 concurrent" => fn ->
tasks = for _ <- 1..1000 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks)
end,
"10000 concurrent" => fn ->
tasks = for _ <- 1..10000 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks, 30_000)
end
},
time: 3,
warmup: 1,
formatters: [{Benchee.Formatters.Console, comparison: false}],
print: [benchmarking: false, configuration: false]
)
# Calculate and display ops/sec
IO.puts("\nOperations per second:")
for scenario <- suite1.scenarios do
count = case scenario.name do
"1 call" -> 1
"10 concurrent" -> 10
"100 concurrent" -> 100
"1000 concurrent" -> 1000
"10000 concurrent" -> 10000
end
avg_time_us = scenario.run_time_data.statistics.average
ops_per_sec = count / (avg_time_us / 1_000_000)
name = String.pad_trailing(scenario.name, 20)
IO.puts(" #{name} #{round(ops_per_sec)} ops/sec")
end
GenServer.stop(pid)
# Test 2: Multiple pre-created instances
IO.puts("\nTEST 2: Multiple Instance Parallel Execution")
IO.puts("-" <> String.duplicate("-", 40))
# Pre-create instances
instances_10 = for _ <- 1..10, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1)
instances_100 = for _ <- 1..100, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1)
# Warmup
for pid <- instances_10, do: Wasmex.call_function(pid, :add, [1, 2])
suite2 = Benchee.run(
%{
"10 instances" => fn ->
tasks = for pid <- instances_10 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks)
end,
"100 instances" => fn ->
tasks = for pid <- instances_100 do
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end)
end
Task.await_many(tasks)
end
},
time: 3,
warmup: 1,
formatters: [{Benchee.Formatters.Console, comparison: false}],
print: [benchmarking: false, configuration: false]
)
# Calculate and display ops/sec
IO.puts("\nOperations per second:")
for scenario <- suite2.scenarios do
count = case scenario.name do
"10 instances" -> 10
"100 instances" -> 100
end
avg_time_us = scenario.run_time_data.statistics.average
ops_per_sec = count / (avg_time_us / 1_000_000)
name = String.pad_trailing(scenario.name, 20)
IO.puts(" #{name} #{round(ops_per_sec)} ops/sec total")
end
# Cleanup
for pid <- instances_10, do: GenServer.stop(pid)
for pid <- instances_100, do: GenServer.stop(pid)
# Test 3: Sustained throughput
IO.puts("\nTEST 3: Sustained Throughput (3 seconds)")
IO.puts("-" <> String.duplicate("-", 40))
for instance_count <- [1, 10, 50, 100] do
instances = for _ <- 1..instance_count, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1)
# Run all instances continuously for 3 seconds
start_time = System.monotonic_time(:millisecond)
end_time = start_time + 3000
# Start a task for each instance that runs continuously
tasks = Enum.map(instances, fn pid ->
Task.async(fn ->
Stream.cycle([1])
|> Enum.reduce_while(0, fn _, acc ->
if System.monotonic_time(:millisecond) < end_time do
{:ok, _} = Wasmex.call_function(pid, :add, [1, 2])
{:cont, acc + 1}
else
{:halt, acc}
end
end)
end)
end)
counts = Task.await_many(tasks, 10_000)
total_ops = Enum.sum(counts)
elapsed = System.monotonic_time(:millisecond) - start_time
total_ops_per_sec = total_ops * 1000 / elapsed
per_instance_ops = total_ops_per_sec / instance_count
instance_str = String.pad_trailing("#{instance_count} instance(s)", 15)
total_str = String.pad_leading("#{round(total_ops_per_sec)}", 8)
per_str = if instance_count > 1 do
" (#{round(per_instance_ops)} per instance)"
else
""
end
IO.puts(" #{instance_str} #{total_str} ops/sec#{per_str}")
# Cleanup
for pid <- instances, do: GenServer.stop(pid)
end
# Test 4: I/O-bound workload
IO.puts("\nTEST 4: I/O-Bound Workload (with 10ms sleep)")
IO.puts("-" <> String.duplicate("-", 40))
# Define imports for sleep function
imports = %{
env: %{
sleep_ms: {:fn, [:i32], [], fn _context, milliseconds ->
Process.sleep(milliseconds)
{:ok, []}
end}
}
}
IO.puts("Testing concurrent sleeping operations...")
# Test with different concurrency levels
for concurrent_count <- [10, 50, 100, 200] do
# Create instance with imports
{:ok, pid} = Wasmex.start_link(%{bytes: wat_io, imports: imports})
start_time = System.monotonic_time(:millisecond)
# Start concurrent sleeping operations
tasks = for _ <- 1..concurrent_count do
Task.async(fn ->
# Each operation sleeps for 10ms
Wasmex.call_function(pid, :io_operation, [10])
end)
end
# Wait for all to complete
Task.await_many(tasks, 30_000)
elapsed = System.monotonic_time(:millisecond) - start_time
ops_per_sec = concurrent_count * 1000 / elapsed
concurrent_str = String.pad_trailing("#{concurrent_count} concurrent", 15)
time_str = String.pad_leading("#{elapsed}ms", 8)
ops_str = String.pad_leading("#{round(ops_per_sec)}", 6)
# Expected time: ~10ms if perfectly async, longer if blocking
expected_time = if concurrent_count <= 10, do: 10, else: 10
blocking_factor = elapsed / expected_time
IO.puts(" #{concurrent_str} #{time_str} (#{ops_str} ops/sec, #{Float.round(blocking_factor, 1)}x expected)")
GenServer.stop(pid)
end
IO.puts("")
IO.puts("Note: Lower blocking factor = better async handling")
IO.puts(" 1x = perfect async, higher = thread blocking")
IO.puts("\nSUMMARY")
IO.puts("-" <> String.duplicate("-", 40))
IO.puts("Key metrics to compare between branches:")
IO.puts("- Single instance handling 1000 concurrent calls")
IO.puts("- Sustained throughput with 100 instances")
IO.puts("- I/O-bound: 200 concurrent sleeping operations")
IO.puts("- Scaling efficiency from 1 to 100 instances")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment