Created
June 2, 2017 13:38
-
-
Save x100ex/fea1c6bd5c35754503e4c31aab496766 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://schani.wordpress.com/2010/04/30/linear-vs-binary-search/ | |
static int | |
linear_sentinel_sse2_nobranch (const int *arr, int n, int key) { | |
v4si *in_data = (v4si*)arr; | |
v4si key4 = { key, key, key, key }; | |
int i = 0; | |
for (;;) { | |
v4si cmp0 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 0]); | |
v4si cmp1 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 1]); | |
v4si cmp2 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 2]); | |
v4si cmp3 = __builtin_ia32_pcmpgtd128 (key4, in_data [i + 3]); | |
v8hi pack01 = __builtin_ia32_packssdw128 (cmp0, cmp1); | |
v8hi pack23 = __builtin_ia32_packssdw128 (cmp2, cmp3); | |
v16qi pack0123 = __builtin_ia32_packsswb128 (pack01, pack23); | |
int res = __builtin_ia32_pmovmskb128 (pack0123); | |
if (res != 0xffff) | |
break; | |
i += 4; | |
} | |
return i * 4 + __builtin_ctz (~res); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment