Skip to content

Instantly share code, notes, and snippets.

@x100ex
Created June 2, 2017 13:38
Show Gist options
  • Save x100ex/fea1c6bd5c35754503e4c31aab496766 to your computer and use it in GitHub Desktop.
Save x100ex/fea1c6bd5c35754503e4c31aab496766 to your computer and use it in GitHub Desktop.
// https://schani.wordpress.com/2010/04/30/linear-vs-binary-search/
static int
linear_sentinel_sse2_nobranch (const int *arr, int n, int key) {
v4si *in_data = (v4si*)arr;
v4si key4 = { key, key, key, key };
int i = 0;
for (;;) {
v4si cmp0 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 0]);
v4si cmp1 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 1]);
v4si cmp2 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 2]);
v4si cmp3 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 3]);
v8hi pack01 = __builtin_ia32_packssdw128 (cmp0, cmp1);
v8hi pack23 = __builtin_ia32_packssdw128 (cmp2, cmp3);
v16qi pack0123 = __builtin_ia32_packsswb128 (pack01, pack23);
int res = __builtin_ia32_pmovmskb128 (pack0123);
if (res != 0xffff)
break;
i += 4;
}
return i * 4 + __builtin_ctz (~res);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment