Created
September 2, 2025 09:53
-
-
Save patrickdet/45ea8051163d48f755cc1a6d8cb10240 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env elixir | |
# Clean, focused benchmark for comparing main vs tokio branch | |
Mix.install([ | |
{:wasmex, path: "."}, | |
{:benchee, "~> 1.3"} | |
]) | |
wat_compute = """ | |
(module | |
(func $nop (export "nop")) | |
(func $add (export "add") (param $a i32) (param $b i32) (result i32) | |
local.get $a | |
local.get $b | |
i32.add | |
) | |
) | |
""" | |
wat_io = """ | |
(module | |
(import "env" "sleep_ms" (func $sleep (param i32))) | |
(func $io_operation (export "io_operation") (param $delay i32) | |
local.get $delay | |
call $sleep | |
) | |
(func $mixed_operation (export "mixed_operation") (param $delay i32) (result i32) | |
;; Do some computation | |
i32.const 42 | |
i32.const 58 | |
i32.add | |
drop ;; Drop the computation result | |
;; Then sleep | |
local.get $delay | |
call $sleep | |
;; Return result | |
i32.const 100 | |
) | |
) | |
""" | |
IO.puts("\nWASMEX PERFORMANCE BENCHMARK") | |
IO.puts("=" <> String.duplicate("=", 60)) | |
{branch, 0} = System.cmd("git", ["branch", "--show-current"]) | |
IO.puts("Branch: #{String.trim(branch)}") | |
IO.puts("") | |
IO.puts("METHODOLOGY") | |
IO.puts("-" <> String.duplicate("-", 60)) | |
IO.puts("This benchmark measures WebAssembly execution performance in three scenarios:") | |
IO.puts("") | |
IO.puts("1. CONCURRENT CALLS: Multiple async tasks calling a single instance") | |
IO.puts(" - Tests how well the runtime handles concurrent access to one instance") | |
IO.puts(" - Measures coordination overhead and potential lock contention") | |
IO.puts("") | |
IO.puts("2. PARALLEL INSTANCES: Multiple pre-created instances executing in parallel") | |
IO.puts(" - Tests how well the runtime scales across multiple instances") | |
IO.puts(" - Each instance receives one call per round") | |
IO.puts("") | |
IO.puts("3. SUSTAINED THROUGHPUT: Continuous execution over fixed time period") | |
IO.puts(" - Tests maximum sustained operations per second") | |
IO.puts(" - Removes coordination overhead to show raw performance") | |
IO.puts("") | |
IO.puts("4. I/O-BOUND WORKLOAD: Operations with host-provided sleep") | |
IO.puts(" - Tests how runtime handles blocking/waiting operations") | |
IO.puts(" - Reveals async (Tokio) vs thread-blocking (OS threads) behavior") | |
IO.puts(" - Shows scaling limits when operations wait for I/O") | |
IO.puts("") | |
IO.puts("WASM FUNCTIONS:") | |
IO.puts("- Tests 1-3: Simple integer addition (CPU-bound, ~3 WASM instructions)") | |
IO.puts("- Test 4: Host sleep import (I/O-bound, tests async handling)") | |
IO.puts("") | |
# Test 1: Single instance handling concurrent calls | |
IO.puts("TEST 1: Single Instance Concurrent Calls") | |
IO.puts("-" <> String.duplicate("-", 40)) | |
{:ok, pid} = Wasmex.start_link(%{bytes: wat_compute}) | |
# Warmup | |
for _ <- 1..100, do: Wasmex.call_function(pid, :add, [1, 2]) | |
suite1 = Benchee.run( | |
%{ | |
"1 call" => fn -> | |
Wasmex.call_function(pid, :add, [1, 2]) | |
end, | |
"10 concurrent" => fn -> | |
tasks = for _ <- 1..10 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks) | |
end, | |
"100 concurrent" => fn -> | |
tasks = for _ <- 1..100 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks) | |
end, | |
"1000 concurrent" => fn -> | |
tasks = for _ <- 1..1000 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks) | |
end, | |
"10000 concurrent" => fn -> | |
tasks = for _ <- 1..10000 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks, 30_000) | |
end | |
}, | |
time: 3, | |
warmup: 1, | |
formatters: [{Benchee.Formatters.Console, comparison: false}], | |
print: [benchmarking: false, configuration: false] | |
) | |
# Calculate and display ops/sec | |
IO.puts("\nOperations per second:") | |
for scenario <- suite1.scenarios do | |
count = case scenario.name do | |
"1 call" -> 1 | |
"10 concurrent" -> 10 | |
"100 concurrent" -> 100 | |
"1000 concurrent" -> 1000 | |
"10000 concurrent" -> 10000 | |
end | |
avg_time_us = scenario.run_time_data.statistics.average | |
ops_per_sec = count / (avg_time_us / 1_000_000) | |
name = String.pad_trailing(scenario.name, 20) | |
IO.puts(" #{name} #{round(ops_per_sec)} ops/sec") | |
end | |
GenServer.stop(pid) | |
# Test 2: Multiple pre-created instances | |
IO.puts("\nTEST 2: Multiple Instance Parallel Execution") | |
IO.puts("-" <> String.duplicate("-", 40)) | |
# Pre-create instances | |
instances_10 = for _ <- 1..10, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1) | |
instances_100 = for _ <- 1..100, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1) | |
# Warmup | |
for pid <- instances_10, do: Wasmex.call_function(pid, :add, [1, 2]) | |
suite2 = Benchee.run( | |
%{ | |
"10 instances" => fn -> | |
tasks = for pid <- instances_10 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks) | |
end, | |
"100 instances" => fn -> | |
tasks = for pid <- instances_100 do | |
Task.async(fn -> Wasmex.call_function(pid, :add, [1, 2]) end) | |
end | |
Task.await_many(tasks) | |
end | |
}, | |
time: 3, | |
warmup: 1, | |
formatters: [{Benchee.Formatters.Console, comparison: false}], | |
print: [benchmarking: false, configuration: false] | |
) | |
# Calculate and display ops/sec | |
IO.puts("\nOperations per second:") | |
for scenario <- suite2.scenarios do | |
count = case scenario.name do | |
"10 instances" -> 10 | |
"100 instances" -> 100 | |
end | |
avg_time_us = scenario.run_time_data.statistics.average | |
ops_per_sec = count / (avg_time_us / 1_000_000) | |
name = String.pad_trailing(scenario.name, 20) | |
IO.puts(" #{name} #{round(ops_per_sec)} ops/sec total") | |
end | |
# Cleanup | |
for pid <- instances_10, do: GenServer.stop(pid) | |
for pid <- instances_100, do: GenServer.stop(pid) | |
# Test 3: Sustained throughput | |
IO.puts("\nTEST 3: Sustained Throughput (3 seconds)") | |
IO.puts("-" <> String.duplicate("-", 40)) | |
for instance_count <- [1, 10, 50, 100] do | |
instances = for _ <- 1..instance_count, do: elem(Wasmex.start_link(%{bytes: wat_compute}), 1) | |
# Run all instances continuously for 3 seconds | |
start_time = System.monotonic_time(:millisecond) | |
end_time = start_time + 3000 | |
# Start a task for each instance that runs continuously | |
tasks = Enum.map(instances, fn pid -> | |
Task.async(fn -> | |
Stream.cycle([1]) | |
|> Enum.reduce_while(0, fn _, acc -> | |
if System.monotonic_time(:millisecond) < end_time do | |
{:ok, _} = Wasmex.call_function(pid, :add, [1, 2]) | |
{:cont, acc + 1} | |
else | |
{:halt, acc} | |
end | |
end) | |
end) | |
end) | |
counts = Task.await_many(tasks, 10_000) | |
total_ops = Enum.sum(counts) | |
elapsed = System.monotonic_time(:millisecond) - start_time | |
total_ops_per_sec = total_ops * 1000 / elapsed | |
per_instance_ops = total_ops_per_sec / instance_count | |
instance_str = String.pad_trailing("#{instance_count} instance(s)", 15) | |
total_str = String.pad_leading("#{round(total_ops_per_sec)}", 8) | |
per_str = if instance_count > 1 do | |
" (#{round(per_instance_ops)} per instance)" | |
else | |
"" | |
end | |
IO.puts(" #{instance_str} #{total_str} ops/sec#{per_str}") | |
# Cleanup | |
for pid <- instances, do: GenServer.stop(pid) | |
end | |
# Test 4: I/O-bound workload | |
IO.puts("\nTEST 4: I/O-Bound Workload (with 10ms sleep)") | |
IO.puts("-" <> String.duplicate("-", 40)) | |
# Define imports for sleep function | |
imports = %{ | |
env: %{ | |
sleep_ms: {:fn, [:i32], [], fn _context, milliseconds -> | |
Process.sleep(milliseconds) | |
{:ok, []} | |
end} | |
} | |
} | |
IO.puts("Testing concurrent sleeping operations...") | |
# Test with different concurrency levels | |
for concurrent_count <- [10, 50, 100, 200] do | |
# Create instance with imports | |
{:ok, pid} = Wasmex.start_link(%{bytes: wat_io, imports: imports}) | |
start_time = System.monotonic_time(:millisecond) | |
# Start concurrent sleeping operations | |
tasks = for _ <- 1..concurrent_count do | |
Task.async(fn -> | |
# Each operation sleeps for 10ms | |
Wasmex.call_function(pid, :io_operation, [10]) | |
end) | |
end | |
# Wait for all to complete | |
Task.await_many(tasks, 30_000) | |
elapsed = System.monotonic_time(:millisecond) - start_time | |
ops_per_sec = concurrent_count * 1000 / elapsed | |
concurrent_str = String.pad_trailing("#{concurrent_count} concurrent", 15) | |
time_str = String.pad_leading("#{elapsed}ms", 8) | |
ops_str = String.pad_leading("#{round(ops_per_sec)}", 6) | |
# Expected time: ~10ms if perfectly async, longer if blocking | |
expected_time = if concurrent_count <= 10, do: 10, else: 10 | |
blocking_factor = elapsed / expected_time | |
IO.puts(" #{concurrent_str} #{time_str} (#{ops_str} ops/sec, #{Float.round(blocking_factor, 1)}x expected)") | |
GenServer.stop(pid) | |
end | |
IO.puts("") | |
IO.puts("Note: Lower blocking factor = better async handling") | |
IO.puts(" 1x = perfect async, higher = thread blocking") | |
IO.puts("\nSUMMARY") | |
IO.puts("-" <> String.duplicate("-", 40)) | |
IO.puts("Key metrics to compare between branches:") | |
IO.puts("- Single instance handling 1000 concurrent calls") | |
IO.puts("- Sustained throughput with 100 instances") | |
IO.puts("- I/O-bound: 200 concurrent sleeping operations") | |
IO.puts("- Scaling efficiency from 1 to 100 instances") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment