Skip to content

Instantly share code, notes, and snippets.

@danielealbano
Last active December 16, 2019 09:52
Show Gist options
  • Save danielealbano/233c5f56dc7d7a315431407d63db24b0 to your computer and use it in GitHub Desktop.
Save danielealbano/233c5f56dc7d7a315431407d63db24b0 to your computer and use it in GitHub Desktop.
## INSTRUCTIONS
##
## compile using
## gcc -O3 -funroll-loops -std=c11 -march=native -mavx2 -msse -msse2 -msse3 -msse4 -o test-ht-cmp-methods.c test-ht-cmp-methods.c
##
## run with
## ./test-ht-cmp-methods.c
##
## Output (comoare method, key idx, ht size rounded up per cache line, cpu cycles, clock ticks)
## simple,7,8,220,2
## simple,15,16,242,3
## simple,999,1000,1587,3
## simple,9999,10000,12524,6
## simple,99999,100000,139767,51
## simple,999999,1000000,1721598,595
## simple,9999999,10000000,18302077,6297
## simple,99999999,100000000,152492410,52420
## sse,7,8,109,1
## sse,15,16,115,1
## sse,999,1000,1331,2
## sse,9999,10000,14477,8
## sse,99999,100000,195084,68
## sse,999999,1000000,2232959,771
## sse,9999999,10000000,23281896,7856
## sse,99999999,100000000,198084171,68021
## avx2,7,8,178,1
## avx2,15,16,183,2
## avx2,999,1000,1732,3
## avx2,9999,10000,15230,7
## avx2,99999,100000,230672,71
## avx2,999999,1000000,1960848,677
## avx2,9999999,10000000,24492360,8331
## avx2,99999999,100000000,166761353,57420
##
## CODE BELOW
##
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <xmmintrin.h>
#include <immintrin.h>
volatile uint64_t* ht_keys;
static inline unsigned long rdtscp_start(void)
{
unsigned long var;
unsigned int hi, lo;
__asm volatile ("cpuid\n\t"
"rdtsc\n\t" : "=a" (lo), "=d" (hi)
:: "%rbx", "%rcx");
var = ((unsigned long)hi << 32) | lo;
return (var);
}
static inline unsigned long rdtscp_end(void)
{
unsigned long var;
unsigned int hi, lo;
__asm volatile ("rdtscp\n\t"
"mov %%edx, %1\n\t"
"mov %%eax, %0\n\t"
"cpuid\n\t" : "=r" (lo), "=r" (hi)
:: "%rax", "%rbx", "%rcx", "%rdx");
var = ((unsigned long)hi << 32) | lo;
return (var);
}
int ht_search_key_simple(volatile uint64_t* ht_keys, uint64_t ht_size, uint64_t key, uint64_t* bucket_index)
{
uint8_t found_in_ht = 0;
for(uint64_t i = 0; i < ht_size; i+=4)
{
if (ht_keys[i] == key)
{
found_in_ht = 1;
*bucket_index = i;
}
else if (ht_keys[i + 1] == key)
{
found_in_ht = 1;
*bucket_index = i + 1;
}
else if (ht_keys[i + 2] == key)
{
found_in_ht = 1;
*bucket_index = i + 2;
}
else if (ht_keys[i + 3] == key)
{
found_in_ht = 1;
*bucket_index = i + 3;
}
if (found_in_ht)
{
break;
}
}
return found_in_ht;
}
int ht_search_key_sse(volatile uint64_t* ht_keys, uint64_t ht_size, uint64_t key, uint64_t* bucket_index)
{
uint64_t found_in_ht = 0;
uint32_t __attribute__ ((aligned(16))) search_vector[4] = {
(uint32_t)((key & 0xFFFFFFFF00000000LL) >> 32), (uint32_t)(key & 0xFFFFFFFFLL),
(uint32_t)((key & 0xFFFFFFFF00000000LL) >> 32), (uint32_t)(key & 0xFFFFFFFFLL)
};
uint32_t __attribute__ ((aligned(16))) result[4];
__m128i v1 = _mm_load_si128((__m128i *)search_vector);
for(uint64_t i = 0; i < ht_size; i+=2)
{
__m128i v2 = _mm_load_si128((__m128i *)&ht_keys[i]);
__m128i vcmp = _mm_cmpeq_epi32(v1, v2);
_mm_store_si128((__m128i *)result, vcmp);
if (((uint64_t*)result)[0] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 0;
}
else if (((uint64_t*)result)[1] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 1;
}
if (found_in_ht)
{
break;
}
}
return found_in_ht;
}
int ht_search_key_avx2(volatile uint64_t* ht_keys, uint64_t ht_size, uint64_t key, uint64_t* bucket_index)
{
uint64_t found_in_ht = 0;
uint64_t __attribute__ ((aligned(16))) search_vector[4] = { key, key, key, key };
uint64_t __attribute__ ((aligned(16))) result[4];
__m256i v1 = _mm256_loadu_si256((__m256i *)search_vector);
for(uint64_t i = 0; i < ht_size; i+=4)
{
__m256i v2 = _mm256_loadu_si256((__m256i *)&ht_keys[i]);
__m256i vcmp = _mm256_cmpeq_epi64(v1, v2);
_mm256_store_si256((__m256i *)result, vcmp);
if (result[0] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 0;
break;
}
else if (result[1] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 1;
break;
}
else if (result[2] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 2;
break;
}
else if (result[3] == 0xFFFFFFFFFFFFFFFF)
{
found_in_ht = 1;
*bucket_index = i + 3;
break;
}
}
return found_in_ht;
}
typedef int (*ht_search_key_fn_t)(volatile uint64_t*, uint64_t, uint64_t, uint64_t*);
typedef struct {
char* name;
ht_search_key_fn_t fn;
} compare_method_t;
int main(int argc, char** argv)
{
compare_method_t compare_methods[] = {
{
.name = "simple",
.fn = &ht_search_key_simple
},
{
.name = "sse",
.fn = &ht_search_key_sse
},
{
.name = "avx2",
.fn = &ht_search_key_avx2
},
{
.name = NULL,
.fn = NULL
}
};
uint64_t ht_sizes[] = {1, 9, 999, 9999, 99999, 999999, 9999999, 99999999};
compare_method_t* compare_method = &compare_methods[0];
while(compare_method->name != NULL)
{
for(uint8_t ht_sizes_idx = 0; ht_sizes_idx < sizeof(ht_sizes) / sizeof(uint64_t); ht_sizes_idx++)
{
uint64_t ht_size = ht_sizes[ht_sizes_idx];
// Ensure that ht_size is big N times a cacheline
ht_size = ht_size + ((64 - ((ht_size * sizeof(uint64_t)) % 64)) / sizeof(uint64_t));
uint64_t key = ht_size - 1;
// Allocate memory
ht_keys = _mm_malloc(ht_size * sizeof(uint64_t), 64 * 8);
if (ht_keys == NULL)
{
perror("Unable to allocate memory");
return -1;
}
// fill the ht_keys
for(uint64_t i = 0; i < ht_size; i++)
{
ht_keys[i] = i;
}
//struct timespec start, end;
clock_t tick, tock;
uint64_t total_cpu_cycles;
uint64_t found_in_ht_bucket_index;
printf("%s,%lld,%lld,", compare_method->name, key, ht_size);
for(uint8_t loop = 0; loop < 5; loop++)
{
tick = clock();
total_cpu_cycles = rdtscp_start();
(*compare_method->fn)(ht_keys, ht_size, key, &found_in_ht_bucket_index);
total_cpu_cycles = rdtscp_end() - total_cpu_cycles;
tock = clock();
}
clock_t ticks = tock - tick;
//double dt = (double)ticks;
printf("%lld,", total_cpu_cycles);
printf("%lu\n", ticks);
_mm_free((void*)ht_keys);
}
compare_method++;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment