Skip to content

Instantly share code, notes, and snippets.

@kmcallister
Last active August 29, 2015 13:58
Show Gist options
  • Save kmcallister/9986166 to your computer and use it in GitHub Desktop.
Save kmcallister/9986166 to your computer and use it in GitHub Desktop.
Scanning for ASCII whitespace with SSE 4.2
#[feature(asm, macro_rules)];
extern crate test;
use test::BenchHarness;
static whitespace: &'static [u8] = bytes!("\r\n\t \0 ");
#[inline(never)]
fn has_space_sse(s: &str) -> bool {
let mut res: bool;
unsafe {
asm!("
movdqu ($1), %xmm1
2: movdqu ($2), %xmm2
pcmpestri $$0, %xmm1, %xmm2
jb 1f
add $$0x10, $2
sub $$0x10, %eax
jns 2b
clc
1: setb $0"
: "=r"(res) : "r"(whitespace.as_ptr()), "r"(s.as_ptr()),
"{eax}"(s.len() as u32), "{edx}"(16)
: "xmm1", "xmm2", "eax", "ecx", "edx");
}
res
}
#[inline(never)]
fn has_space_sse_implicit(s: &str) -> bool {
let len = s.len();
let tail_len = (len % 16) as u32;
let mut res: bool;
unsafe {
let ptr_start = s.as_ptr();
let ptr_end = ptr_start.offset((len as int) - (tail_len as int)) as uint;
asm!("
movdqu ($1), %xmm1
test $3, $3
jns 2f
1: movdqu ($2,$3), %xmm2
pcmpistri $$0, %xmm2, %xmm1
jbe 3f
add $$0x10, $3
jnz 1b
2: movdqu ($2), %xmm2
mov $$0x10, %eax
pcmpestri $$0, %xmm2, %xmm1
setb $0
3:"
: "=&r"(res)
: "r"(whitespace.as_ptr()), "r"(ptr_end), "r"(ptr_start as uint - ptr_end),
"{edx}"(tail_len), "0"(1)
: "xmm1", "xmm2", "ecx");
}
res
}
#[inline(never)]
fn has_space_naive(s: &str) -> bool {
for c in s.chars() {
match c {
'\0' | '\t' | '\n' | '\r' | ' ' => return true,
_ => (),
}
}
false
}
#[inline(never)]
fn has_space_byte(s: &str) -> bool {
for b in s.bytes() {
match b {
0x00 | 0x09 | 0x0A | 0x0D | 0x20 => return true,
_ => (),
}
}
false
}
static str_with_space: &'static str
= "hereisareallylongstringthatdoes!eventually!contain!a!spacehereisareallylongstringthatdoes!eventually!contain!a!spacehereisareallylongstringthatdoes!eventually!contain a spacehereisareallylongstringthatdoes eventually contain a spacehereisareallylongstringthatdoes eventually contain a spacehereisareallylongstringthatdoes eventually contain a spacehereisareallylongstringthatdoes eventually contain a spacehereisareallylongstringthatdoes eventually contain a space";
static thirtytwo: &'static str
= "0123456789ABCDEF0123456789ABCDEF";
static thirtyone: &'static str
= "0123456789ABCDEF0123456789ABCDE";
static end_space: &'static str
= "0123456789ABCDEF0123456789ABCD ";
static str_without_space: &'static str
= "hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!hereisareallylongstringthatdoesn'tcontainanyspacesatall!!!";
static str_with_null: &'static str
= "here's\0astringwithanullinit";
#[test]
fn smoke_test() {
assert!(has_space_naive(str_with_space));
assert!(has_space_byte(str_with_space));
assert!(has_space_sse(str_with_space));
assert!(has_space_sse_implicit(str_with_space));
assert!(has_space_naive(str_with_null));
assert!(has_space_byte(str_with_null));
assert!(has_space_sse(str_with_null));
assert!(has_space_sse_implicit(str_with_null));
assert!(has_space_sse(end_space));
assert!(has_space_sse_implicit(end_space));
assert!(!has_space_naive(str_without_space));
assert!(!has_space_byte(str_without_space));
assert!(!has_space_sse(str_without_space));
assert!(!has_space_sse_implicit(str_without_space));
assert!(!has_space_sse(thirtytwo));
assert!(!has_space_sse_implicit(thirtytwo));
assert!(!has_space_sse(thirtyone));
assert!(!has_space_sse_implicit(thirtyone));
assert!(!has_space_sse("foo"));
assert!(!has_space_sse_implicit("foo"));
assert!(!has_space_sse(""));
assert!(!has_space_sse_implicit(""));
}
macro_rules! mk_bench ( ($name:ident, $f:ident, $s:expr) => (
#[bench]
fn $name(bh: &mut BenchHarness) {
bh.iter(|| $f($s));
}
))
mk_bench!(naive_space, has_space_naive, str_with_space)
mk_bench!(naive_no_space, has_space_naive, str_without_space)
mk_bench!(byte_space, has_space_byte, str_with_space)
mk_bench!(byte_no_space, has_space_byte, str_without_space)
mk_bench!(sse_space, has_space_sse, str_with_space)
mk_bench!(sse_no_space, has_space_sse, str_without_space)
mk_bench!(sse_implicit_space, has_space_sse_implicit, str_with_space)
mk_bench!(sse_implicit_no_space, has_space_sse_implicit, str_without_space)
macro_rules! mk_huge ( ($name:ident, $f:ident) => (
#[bench]
fn $name(bh: &mut BenchHarness) {
let sz = 100*1024*1024;
let mut s: ~str = ~"";
while s.len() < sz {
s.push_str(str_without_space);
}
s.truncate(sz);
bh.iter(|| $f(s))
}
))
mk_huge!(huge_pcmpestri, has_space_sse)
mk_huge!(huge_pcmpistri, has_space_sse_implicit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment