Benchmarking different ways to dispatch to a trait method, using static dispatch (compile time polymorphism), dynamic dispatch (runtime polymorphism) and dynamic dispatch backed by an arena allocator.
cargo.toml
[dependencies]
bumpalo = "3.13.0"
criterion = "0.5.1"
[[bench]]
name = "my_benchmark"
harness = false
benches/my_benchmark.rs
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use bumpalo::Bump;
const ITEMS_PER_TYPE: usize = 1_000_000;
trait Hittable {
fn test_method(&self) -> u64;
}
#[derive(Default)]
struct Point {
x: f32,
y: f32,
}
struct Sphere {
radius: u64,
center: Point,
}
impl Hittable for Sphere {
fn test_method(&self) -> u64 {
self.radius
}
}
impl Default for Sphere {
fn default() -> Self {
Self { radius: 5, center: Default::default() }
}
}
struct Cube {
width: u64,
height: u64,
center: Point,
}
impl Hittable for Cube {
fn test_method(&self) -> u64 {
self.width
}
}
impl Default for Cube {
fn default() -> Self {
Self { width: 5, height: Default::default(), center: Default::default() }
}
}
struct Triangle {
p1: Point,
p2: Point,
p3: Point,
}
impl Hittable for Triangle {
fn test_method(&self) -> u64 {
self.p1.x.floor() as u64
}
}
impl Default for Triangle {
fn default() -> Self {
Self { p1: Point { x: 5., y: 0. }, p2: Default::default(), p3: Default::default() }
}
}
/// Saves a list for each type into a HashMap<TypeId, Vec<T>>
/// and uses some macro magic to make the types easily configurable at compile time
fn static_dispatch(c: &mut Criterion) {
struct HittableList<T:Hittable>(Vec<T>);
impl<T:Hittable + Default> Default for HittableList<T> {
fn default() -> Self {
HittableList((0..ITEMS_PER_TYPE).map(|_| T::default()).collect())
}
}
macro_rules! generate_lists {
($($T:ty),*) => {{
use std::collections::HashMap;
use std::any::{TypeId, Any};
let mut shapes: HashMap<TypeId, Box<dyn Any>> = HashMap::new();
// dynamic dispatch is only once per list
$(shapes.insert(TypeId::of::<$T>(), Box::<HittableList<$T>>::default());)*
shapes
}}
}
macro_rules! call_method {
($list:ident, $($T:ty),*) => {{
let mut tot = 0;
$(tot += $list
.get(&std::any::TypeId::of::<$T>())
.unwrap()
.downcast_ref::<HittableList<$T>>() // warning: T is not bound to Hittable. Might compile and fail at runtime
.unwrap()
.0.iter()
.map(|s| s.test_method()) // here the static dispatch happens
.sum::<u64>();
)*
tot
}};
}
let shapes = generate_lists!(Sphere, Cube, Triangle);
c.bench_function("static dispatch", |b| b.iter(|| {
black_box(call_method!(shapes, Sphere, Cube, Triangle));
}));
}
/// Saves all shapes into a Vec<Box<dyn Hittable>> and uses runtime polymorphism
fn dynamic_dispatch(c: &mut Criterion) {
fn create_shapes<T:Hittable + Default + 'static>() -> Vec<Box<dyn Hittable>> {
(0..ITEMS_PER_TYPE).map(|_| {
Box::<T>::default() as Box<dyn Hittable>
}).collect()
}
let mut shapes: Vec<Box<dyn Hittable>> = vec![];
shapes.append(&mut create_shapes::<Cube>());
shapes.append(&mut create_shapes::<Sphere>());
shapes.append(&mut create_shapes::<Triangle>());
c.bench_function("dynamic dispatch", |b| b.iter(|| {
black_box(shapes.iter().map(|h| h.test_method()).sum::<u64>());
}));
}
/// Same as above, but will allocate all objects in an arena (bumpalo crate)
/// so there are less cache misses since the objects are close in memory
fn dynamic_dispatch_arena(c: &mut Criterion) {
let arena = Bump::new();
macro_rules! insert_shapes {
($T:tt) => {
(0..ITEMS_PER_TYPE).map(|_| {
arena.alloc($T::default()) as &mut dyn Hittable
}).collect::<Vec<&mut dyn Hittable>>()
};
}
let mut shapes = vec![];
shapes.append(&mut insert_shapes!(Cube));
shapes.append(&mut insert_shapes!(Sphere));
shapes.append(&mut insert_shapes!(Triangle));
c.bench_function("dynamic dispatch arena", |b| b.iter(|| {
black_box(shapes.iter().map(|h| h.test_method()).sum::<u64>());
}));
}
criterion_group!(benches, static_dispatch, dynamic_dispatch, dynamic_dispatch_arena);
criterion_main!(benches);
Results
Note that test_method
is very simple, so this is mostly measuring overhead. With a real method the differences would be more negligible.
static dispatch time: [4.2641 ms 4.2899 ms 4.3189 ms]
dynamic dispatch time: [7.2664 ms 7.2894 ms 7.3206 ms]
dynamic dispatch arena time: [6.6548 ms 6.6741 ms 6.6982 ms]