Skip to content

Instantly share code, notes, and snippets.

@Urgau
Created September 26, 2024 08:37
Show Gist options
  • Save Urgau/98edfc2b86f0ea72b0639dee1bd34ae0 to your computer and use it in GitHub Desktop.
Save Urgau/98edfc2b86f0ea72b0639dee1bd34ae0 to your computer and use it in GitHub Desktop.
Comparisions of proposed transpose functions
Benchmark name urgau_transpose grigorenkopv_transpose grigorenkopv_transpose3
2*None 763.50 ps 718.60 ps 674.90 ps
2*u64 1.1840 ns 1.1829 ns 1.0371 ns
10*u64 4.6656 ns 4.1235 ns 4.1598 ns
9*u64+None 3.7074 ns 3.5550 ns 3.4683 ns
2*String 2.0212 ns 2.0284 ns 1.5549 ns
10*String 23.318 ns 25.383 ns 22.737 ns
#![feature(maybe_uninit_array_assume_init)]
#![feature(maybe_uninit_uninit_array)]
#![feature(array_try_map)]
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::mem::MaybeUninit;
pub fn urgau_transpose<T, const N: usize>(this: [Option<T>; N]) -> Option<[T; N]> {
let mut array: MaybeUninit<[T; N]> = MaybeUninit::uninit();
let array_mut = array.as_mut_ptr();
for (i, e) in this.into_iter().enumerate() {
match e {
// SAFETY: `i` is always inbounds
Some(e) => unsafe {
array_mut.cast::<T>().add(i).write(e);
},
None => return None,
}
}
// SAFETY: All the elements have been initialized
Some(unsafe { array.assume_init() })
}
fn urgau_benchmark(c: &mut Criterion) {
c.bench_function("urgau_transpose 2*None", |b| b.iter(|| urgau_transpose(black_box([None::<u64>, None]))));
c.bench_function("urgau_transpose 2*u64", |b| b.iter(|| urgau_transpose(black_box([Some(1111111u64), Some(8)]))));
c.bench_function("urgau_transpose 10*u64", |b| b.iter(|| urgau_transpose(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5)]))));
c.bench_function("urgau_transpose 9*u64+None", |b| b.iter(|| urgau_transpose(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), None, Some(5)]))));
c.bench_function("urgau_transpose 2*String", |b| b.iter(|| urgau_transpose(black_box([Some(String::new()), Some(String::new())]))));
c.bench_function("urgau_transpose 10*String", |b| b.iter(|| urgau_transpose(black_box([Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new())]))));
}
fn grigorenkopv_transpose<T, const N: usize>(val: [Option<T>; N]) -> Option<[T; N]> {
let mut arr: [MaybeUninit<T>; N] = MaybeUninit::uninit_array();
for (from, to) in val.into_iter().zip(arr.iter_mut()) {
match from {
Some(from) => *to = MaybeUninit::new(from),
None => return None,
}
}
Some(unsafe { MaybeUninit::array_assume_init(arr) })
}
fn grigorenkopv_benchmark(c: &mut Criterion) {
c.bench_function("grigorenkopv_transpose 2*None", |b| b.iter(|| grigorenkopv_transpose(black_box([None::<u64>, None]))));
c.bench_function("grigorenkopv_transpose 2*u64", |b| b.iter(|| grigorenkopv_transpose(black_box([Some(1111111u64), Some(8)]))));
c.bench_function("grigorenkopv_transpose 10*u64", |b| b.iter(|| grigorenkopv_transpose(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5)]))));
c.bench_function("grigorenkopv_transpose 9*u64+None", |b| b.iter(|| grigorenkopv_transpose(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), None, Some(5)]))));
c.bench_function("grigorenkopv_transpose 2*String", |b| b.iter(|| grigorenkopv_transpose(black_box([Some(String::new()), Some(String::new())]))));
c.bench_function("grigorenkopv_transpose 10*String", |b| b.iter(|| grigorenkopv_transpose(black_box([Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new())]))));
}
fn grigorenkopv_transpose3<T, const N: usize>(val: [Option<T>; N]) -> Option<[T; N]> {
val.try_map(core::convert::identity)
}
fn grigorenkopv_benchmark3(c: &mut Criterion) {
c.bench_function("grigorenkopv_transpose3 2*None", |b| b.iter(|| grigorenkopv_transpose3(black_box([None::<u64>, None]))));
c.bench_function("grigorenkopv_transpose3 2*u64", |b| b.iter(|| grigorenkopv_transpose3(black_box([Some(1111111u64), Some(8)]))));
c.bench_function("grigorenkopv_transpose3 10*u64", |b| b.iter(|| grigorenkopv_transpose3(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5), Some(5)]))));
c.bench_function("grigorenkopv_transpose3 9*u64+None", |b| b.iter(|| grigorenkopv_transpose3(black_box([Some(1111111u64), Some(8), Some(5), Some(5), Some(5), Some(5), Some(5), None, Some(5)]))));
c.bench_function("grigorenkopv_transpose3 2*String", |b| b.iter(|| grigorenkopv_transpose3(black_box([Some(String::new()), Some(String::new())]))));
c.bench_function("grigorenkopv_transpose3 10*String", |b| b.iter(|| grigorenkopv_transpose3(black_box([Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new()), Some(String::new())]))));
}
criterion_group!(benches, urgau_benchmark, grigorenkopv_benchmark, grigorenkopv_benchmark3);
criterion_main!(benches);
urgau_transpose 2*None time: [749.56 ps 763.50 ps 778.26 ps]
change: [+2.6355% +4.4712% +6.7391%] (p = 0.00 < 0.05)
Performance has regressed.
Found 9 outliers among 100 measurements (9.00%)
6 (6.00%) high mild
3 (3.00%) high severe
urgau_transpose 2*u64 time: [1.1796 ns 1.1840 ns 1.1889 ns]
change: [+5.4191% +6.7016% +7.9648%] (p = 0.00 < 0.05)
Performance has regressed.
Found 17 outliers among 100 measurements (17.00%)
2 (2.00%) low severe
5 (5.00%) low mild
9 (9.00%) high mild
1 (1.00%) high severe
urgau_transpose 10*u64 time: [4.6203 ns 4.6656 ns 4.7209 ns]
change: [-5.7290% -3.4875% -1.4283%] (p = 0.00 < 0.05)
Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
Benchmarking urgau_transpose 9*u64+None: Collecting 100 samples in estimated 5.0000 s (1.3B iterationsurgau_transpose 9*u64+None
time: [3.6887 ns 3.7074 ns 3.7270 ns]
change: [+1.0291% +2.0576% +3.2427%] (p = 0.00 < 0.05)
Performance has regressed.
Found 6 outliers among 100 measurements (6.00%)
4 (4.00%) high mild
2 (2.00%) high severe
urgau_transpose 2*String
time: [2.0117 ns 2.0212 ns 2.0314 ns]
change: [+11.942% +12.728% +13.483%] (p = 0.00 < 0.05)
Performance has regressed.
Found 6 outliers among 100 measurements (6.00%)
4 (4.00%) high mild
2 (2.00%) high severe
urgau_transpose 10*String
time: [23.256 ns 23.318 ns 23.387 ns]
change: [+9.6406% +10.528% +11.439%] (p = 0.00 < 0.05)
Performance has regressed.
Found 8 outliers among 100 measurements (8.00%)
7 (7.00%) high mild
1 (1.00%) high severe
Benchmarking grigorenkopv_transpose 2*None: Collecting 100 samples in estimated 5.0000 s (6.9B iteratigrigorenkopv_transpose 2*None
time: [715.11 ps 718.60 ps 722.32 ps]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose 2*u64: Collecting 100 samples in estimated 5.0000 s (4.2B iteratiogrigorenkopv_transpose 2*u64
time: [1.1781 ns 1.1829 ns 1.1885 ns]
Found 18 outliers among 100 measurements (18.00%)
3 (3.00%) low severe
4 (4.00%) low mild
5 (5.00%) high mild
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose 10*u64: Collecting 100 samples in estimated 5.0000 s (1.1B iteratigrigorenkopv_transpose 10*u64
time: [4.1067 ns 4.1235 ns 4.1423 ns]
Found 6 outliers among 100 measurements (6.00%)
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose 9*u64+None: Collecting 100 samples in estimated 5.0000 s (1.4B itegrigorenkopv_transpose 9*u64+None
time: [3.5388 ns 3.5550 ns 3.5751 ns]
Found 9 outliers among 100 measurements (9.00%)
2 (2.00%) low severe
2 (2.00%) low mild
2 (2.00%) high mild
3 (3.00%) high severe
Benchmarking grigorenkopv_transpose 2*String: Collecting 100 samples in estimated 5.0000 s (2.5B iteragrigorenkopv_transpose 2*String
time: [2.0137 ns 2.0284 ns 2.0450 ns]
Found 16 outliers among 100 measurements (16.00%)
5 (5.00%) high mild
11 (11.00%) high severe
Benchmarking grigorenkopv_transpose 10*String: Collecting 100 samples in estimated 5.0001 s (216M itergrigorenkopv_transpose 10*String
time: [25.210 ns 25.383 ns 25.584 ns]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
Benchmarking grigorenkopv_transpose3 2*None: Collecting 100 samples in estimated 5.0000 s (7.4B iteratgrigorenkopv_transpose3 2*None
time: [669.34 ps 674.90 ps 681.59 ps]
change: [+1.7041% +2.8146% +4.0420%] (p = 0.00 < 0.05)
Performance has regressed.
Found 11 outliers among 100 measurements (11.00%)
5 (5.00%) high mild
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose3 2*u64: Collecting 100 samples in estimated 5.0000 s (4.7B iteratigrigorenkopv_transpose3 2*u64
time: [1.0344 ns 1.0371 ns 1.0400 ns]
change: [-4.5764% -3.7887% -2.9787%] (p = 0.00 < 0.05)
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) low severe
6 (6.00%) high mild
1 (1.00%) high severe
Benchmarking grigorenkopv_transpose3 10*u64: Collecting 100 samples in estimated 5.0000 s (1.1B iteratgrigorenkopv_transpose3 10*u64
time: [4.1465 ns 4.1598 ns 4.1743 ns]
change: [-8.2830% -7.5203% -6.7958%] (p = 0.00 < 0.05)
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
1 (1.00%) low mild
4 (4.00%) high mild
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose3 9*u64+None: Collecting 100 samples in estimated 5.0000 s (1.5B itgrigorenkopv_transpose3 9*u64+None
time: [3.4591 ns 3.4683 ns 3.4787 ns]
change: [+1.5662% +2.2997% +2.9964%] (p = 0.00 < 0.05)
Performance has regressed.
Found 14 outliers among 100 measurements (14.00%)
2 (2.00%) low severe
2 (2.00%) low mild
4 (4.00%) high mild
6 (6.00%) high severe
Benchmarking grigorenkopv_transpose3 2*String: Collecting 100 samples in estimated 5.0000 s (3.2B itergrigorenkopv_transpose3 2*String
time: [1.5481 ns 1.5549 ns 1.5641 ns]
change: [-2.8840% -1.9346% -1.0750%] (p = 0.00 < 0.05)
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
7 (7.00%) high mild
4 (4.00%) high severe
Benchmarking grigorenkopv_transpose3 10*String: Collecting 100 samples in estimated 5.0001 s (233M itegrigorenkopv_transpose3 10*String
time: [22.658 ns 22.737 ns 22.831 ns]
Found 11 outliers among 100 measurements (11.00%)
6 (6.00%) high mild
5 (5.00%) high severe
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment