Last active
August 29, 2015 14:03
-
-
Save hktechn0/60f050d0ba2392f2e49f to your computer and use it in GitHub Desktop.
AVX2 strlen()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <time.h> | |
#include <immintrin.h> | |
#define LENGTH 1000000010 | |
/* | |
coins syspro strlen() | |
http://www.coins.tsukuba.ac.jp/~syspro/2015/shui/quizzes2.html#exam207 | |
*/ | |
size_t my_strlen1(const char *s)__attribute__((noinline)); | |
size_t my_strlen2(const char *s)__attribute__((noinline)); | |
size_t my_strlen3(const char *s)__attribute__((noinline)); | |
size_t my_strlen1(const char *s) | |
{ | |
unsigned int i = 0; | |
while (s[i++] != '\0'); | |
return --i; | |
} | |
size_t my_strlen2(const char *s) | |
{ | |
unsigned int i = 0; | |
unsigned int a; | |
unsigned int *p; | |
p = (unsigned int *)s; | |
a = *p; | |
while (1) { | |
if (!(a & 0x000000ff)) { | |
return i * 4; | |
} else if (!(a & 0x0000ff00)) { | |
return i * 4 + 1; | |
} else if (!(a & 0x00ff0000)) { | |
return i * 4 + 2; | |
} else if (!(a & 0xff000000)) { | |
return i * 4 + 3; | |
} | |
a = p[++i]; | |
} | |
} | |
size_t my_strlen3(const char *s) | |
{ | |
unsigned int i = 0; | |
const __m256i *p; | |
__m256i mask, zero; | |
p = (__m256i *) s; | |
zero = _mm256_setzero_si256(); | |
while (1) { | |
mask = _mm256_cmpeq_epi8(*p, zero); | |
if (!_mm256_testz_si256(mask, mask)) { | |
return (i * 32) + my_strlen1((char *)p); | |
} | |
i++; | |
p++; | |
} | |
} | |
int main(void) | |
{ | |
char *str; | |
FILE *fp; | |
clock_t c1, c2, c3, c4, t; | |
unsigned int l1, l2, l3, l4; | |
if ((str = calloc(LENGTH, sizeof(char))) == NULL) { | |
perror("calloc"); | |
exit(1); | |
} | |
if ((fp = fopen("samplestring", "r")) == NULL) { | |
perror("fopen"); | |
exit(1); | |
} | |
fread(str, sizeof(char), LENGTH, fp); | |
fclose(fp); | |
t = clock(); | |
l1 = strlen(str); | |
c1 = clock() - t; | |
t = clock(); | |
l2 = my_strlen1(str); | |
c2 = clock() - t; | |
t = clock(); | |
l3 = my_strlen2(str); | |
c3 = clock() - t; | |
t = clock(); | |
l4 = my_strlen3(str); | |
c4 = clock() - t; | |
free(str); | |
printf("strlen(): %u, %u\n", l1, (unsigned int)c1); | |
printf("my_strlen1(): %u, %u\n", l2, (unsigned int)c2); | |
printf("my_strlen2(): %u, %u\n", l3, (unsigned int)c3); | |
printf("my_strlen3(): %u, %u\n", l4, (unsigned int)c4); | |
return 0; | |
} | |
/* | |
[s0911454@borage08]~$ clang -mavx2 -O3 strlen.c | |
[s0911454@borage08]~$ ./a.out | |
strlen(): 1000000000, 61636 | |
my_strlen1(): 1000000000, 391436 | |
my_strlen2(): 1000000000, 177409 | |
my_strlen3(): 1000000000, 57086 | |
[s0911454@borage08]~$ uname -a | |
Darwin borage08.coins.tsukuba.ac.jp 13.3.0 Darwin Kernel Version 13.3.0: Tue Jun 3 21:27:35 PDT 2014; root:xnu-2422.110.17~1/RELEASE_X86_64 x86_64 i386 iMac14,2 Darwin | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment