Skip to content

Instantly share code, notes, and snippets.

@toddlipcon
Created December 8, 2011 01:23
Show Gist options
  • Select an option

  • Save toddlipcon/1445662 to your computer and use it in GitHub Desktop.

Select an option

Save toddlipcon/1445662 to your computer and use it in GitHub Desktop.
#include <xmmintrin.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
__m64 zero ;
inline long atol_chunk(char *str) {
uint32_t chunk = *((uint32_t *)str);
// 0x31323334 -> 0x01020304
uint64_t chunk_64 = chunk & 0x0f0f0f0f;
// Interleave the bytes from above with 0s
// so in = 0x0001000200030004
__m64 chunk_m = *((__m64 *)&chunk_64);
__m64 in = _mm_unpacklo_pi8(chunk_m, zero);
// Multiply times the bases. This adds up
// the left half and the right half
__m64 mult = _mm_set_pi16(1, 10, 100, 1000);
__m64 ret_m = _mm_madd_pi16(in, mult);
// Shift back and add together
return ((uint64_t)ret_m >> 32) +
((uint64_t)ret_m & 0xffff);
}
long my_atol(char *arg) {
int len = strlen(arg);
long ret = 0;
while (len >= 4) {
ret *= 10000;
long chunk = atol_chunk(arg);
arg += 4;
len -= 4;
ret += chunk;
}
switch (len) {
case 0:
return ret;
case 1:
ret *= 10;
ret += (*arg - '0');
return ret;
case 2:
ret *= 100;
ret += (*arg++ - '0') * 10;
ret += *arg - '0';
return ret;
case 3:
ret *= 1000;
char buf[4] = {'0','0','0','0'};
memcpy(buf + 4 - len, arg, len);
ret += atol_chunk(buf);
return ret;
default:
assert(0 && "x");
}
}
__inline__ uint64_t rdtsc(void) {
uint32_t lo, hi;
__asm__ __volatile__ ( // serialize
"xorl %%eax,%%eax \n cpuid"
::: "%rax", "%rbx", "%rcx", "%rdx");
/* We cannot use "=A", since this would use %rax on x86_64 and return only the lower 32bits of the TSC */
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
int main() {
char buf[100];
long sum = 0;
int i;
zero = _mm_set_pi16(0,0,0,0);
uint64_t st = rdtsc();
sum = 0;
for (i = 0; i < 10000000; i++) {
sprintf(buf, "%d", i);
sum += atol(buf);
}
uint64_t et = rdtsc();
printf("sum : %ld took %ld ticks\n", sum, (et - st));
st = rdtsc();
sum = 0;
for (i = 0; i < 10000000; i++) {
sprintf(buf, "%d", i);
long ret = my_atol(buf);
sum += ret;
#ifdef CHECK
if (ret != i) {
printf("fail at %d != %ld\n", i, ret);
exit(1);
}
#endif
}
et = rdtsc();
printf("sum : %ld took %ld ticks\n", sum, (et - st));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment