Last active
July 27, 2021 20:46
-
-
Save zommiommy/7b9ce34391ae73519cc6e9633d5b0e7d to your computer and use it in GitHub Desktop.
Benchmarks of different split implementations in rust.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test bench_naive_split ... bench: 2,265,338 ns/iter (+/- 136,512) | |
test bench_naive_split_no_escaping ... bench: 2,287,644 ns/iter (+/- 309,993) | |
test bench_naive_split_no_escaping_hardcoded ... bench: 2,650,552 ns/iter (+/- 27,290) | |
test bench_rust_default_split ... bench: 4,118,575 ns/iter (+/- 153,705) | |
test bench_simd_split_no_escaping ... bench: 1,707,197 ns/iter (+/- 83,440) | |
test bench_simd_split_no_escaping_hardcoded ... bench: 1,688,714 ns/iter (+/- 11,317) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test, asm)] | |
#![feature(option_result_unwrap_unchecked)] | |
#![allow(clippy::unreadable_literal)] | |
extern crate test; | |
use test::{Bencher, black_box}; | |
const TRIALS: usize = 10_000; | |
const TEST_STR: &str = "aaaaaaaaaa,\"bbbbbbbbbb,cccccccccc\",dddddddddd,aaaaaaaaaa,\"bbbbbbbbbb,cccccccccc\",dddddddddd"; | |
fn naive_split_no_escaping_hardcoded(val: &str) -> Vec<String> { | |
let mut result = Vec::with_capacity(4); | |
let mut start_idx = 0; | |
for (idx, c) in val.as_bytes().iter().enumerate() { | |
match *c { | |
b',' => { | |
result.push(val[start_idx..idx].to_string()); | |
start_idx = idx + 1; | |
} | |
_ => {} | |
} | |
} | |
result.push(val[start_idx..val.len() - 1].to_string()); | |
result | |
} | |
fn naive_split_no_escaping(val: &str, delim: u8) -> Vec<String> { | |
let mut result = Vec::with_capacity(4); | |
let mut start_idx = 0; | |
for (idx, c) in val.as_bytes().iter().enumerate() { | |
if *c == delim { | |
result.push(val[start_idx..idx].to_string()); | |
start_idx = idx + 1; | |
} | |
} | |
result.push(val[start_idx..val.len() - 1].to_string()); | |
result | |
} | |
fn naive_split(val: &str, delim: u8) -> Vec<String> { | |
let mut result = Vec::with_capacity(4); | |
let mut start_idx = 0; | |
let mut is_escaped = false; | |
for (idx, c) in val.as_bytes().iter().enumerate() { | |
match *c { | |
b'\"' => { | |
is_escaped = !is_escaped; | |
} | |
x if x == delim && !is_escaped => { | |
result.push(val[start_idx..idx].to_string()); | |
start_idx = idx + 1; | |
} | |
_ => {} | |
} | |
} | |
result.push(val[start_idx..val.len() - 1].to_string()); | |
result | |
} | |
use std::arch::x86_64::{ | |
__m256i, | |
_mm256_loadu_si256, | |
_mm256_set1_epi8, | |
_mm256_cmpeq_epi8, | |
_mm256_movemask_epi8, | |
}; | |
unsafe fn simd_split_no_escaping_hardcoded(val: &str) -> Vec<String> { | |
let mut result = Vec::with_capacity(4); | |
let cmp_vec = _mm256_set1_epi8(b',' as i8); | |
let mut start_idx: usize = 0; | |
let mut base_idx = 0; | |
let mut len_left = val.len(); | |
let mut ptr = val.as_bytes().as_ptr() as *const __m256i; | |
while len_left >= 32 { | |
let data = _mm256_loadu_si256(ptr); | |
let mut mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(data, cmp_vec)); | |
while mask != 0 { | |
let idx = base_idx + mask.trailing_zeros() as usize; | |
result.push(val[start_idx as usize..idx as usize].to_string()); | |
start_idx = idx + 1; | |
// clean the one | |
mask &= mask - 1; | |
} | |
ptr = ptr.add(1); | |
base_idx += 32; | |
len_left -= 32; | |
} | |
let mut idx = base_idx; | |
while idx > val.len() { | |
match val.as_bytes()[idx] { | |
b',' => { | |
result.push(val[start_idx as usize..idx as usize].to_string()); | |
start_idx = idx + 1; | |
} | |
_ => {} | |
}; | |
idx += 1; | |
} | |
result.push(val[start_idx..val.len() - 1].to_string()); | |
result | |
} | |
unsafe fn simd_split_no_escaping(val: &str, delim: u8) -> Vec<String> { | |
let mut result = Vec::with_capacity(4); | |
let cmp_vec = _mm256_set1_epi8(delim as i8); | |
let mut start_idx: usize = 0; | |
let mut base_idx = 0; | |
let mut len_left = val.len(); | |
let mut ptr = val.as_bytes().as_ptr() as *const __m256i; | |
while len_left >= 32 { | |
let data = _mm256_loadu_si256(ptr); | |
let mut mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(data, cmp_vec)); | |
while mask != 0 { | |
let idx = base_idx + mask.trailing_zeros() as usize; | |
result.push(val[start_idx as usize..idx as usize].to_string()); | |
start_idx = idx + 1; | |
// clean the one | |
mask &= mask - 1; | |
} | |
ptr = ptr.add(1); | |
base_idx += 32; | |
len_left -= 32; | |
} | |
let mut idx = base_idx; | |
while idx > val.len() { | |
match val.as_bytes()[idx] { | |
x if x == delim => { | |
result.push(val[start_idx as usize..idx as usize].to_string()); | |
start_idx = idx + 1; | |
} | |
_ => {} | |
}; | |
idx += 1; | |
} | |
result.push(val[start_idx..val.len() - 1].to_string()); | |
result | |
} | |
#[bench] | |
fn bench_naive_split_no_escaping_hardcoded(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..TRIALS { | |
let _ = naive_split_no_escaping_hardcoded(black_box(TEST_STR)); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_naive_split_no_escaping(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..TRIALS { | |
let _ = naive_split_no_escaping(black_box(TEST_STR), b','); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_naive_split(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..TRIALS { | |
let _ = naive_split(black_box(TEST_STR), b','); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_rust_default_split(b: &mut Bencher) { | |
b.iter(|| { | |
for _ in 0..TRIALS { | |
let _ = black_box(TEST_STR).split(',').map(str::to_string).collect::<Vec<String>>(); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_simd_split_no_escaping_hardcoded(b: &mut Bencher) { | |
b.iter(|| unsafe { | |
for _ in 0..TRIALS { | |
let _ = simd_split_no_escaping_hardcoded(black_box(TEST_STR)); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_simd_split_no_escaping(b: &mut Bencher) { | |
b.iter(|| unsafe { | |
for _ in 0..TRIALS { | |
let _ = simd_split_no_escaping(black_box(TEST_STR), b','); | |
} | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment