Created
October 13, 2016 19:26
-
-
Save monocasa/1d44a03cbd0170bfffc6a4a5c37b2210 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Compile with | |
* gcc -O2 -Wall -Wextra -pedantic -o test_bits test_bits.c | |
*/ | |
#define _GNU_SOURCE | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
#include <sched.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <unistd.h> | |
static const unsigned char BitsSetTable256[256] = | |
{ | |
# define B2(n) n, n+1, n+1, n+2 | |
# define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) | |
# define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) | |
B6(0), B6(1), B6(1), B6(2) | |
}; | |
static inline int bit_count_lookup(uint64_t buffer) | |
{ | |
return BitsSetTable256[(buffer >> 0) & 0xFF] + | |
BitsSetTable256[(buffer >> 8) & 0xFF] + | |
BitsSetTable256[(buffer >> 16) & 0xFF] + | |
BitsSetTable256[(buffer >> 24) & 0xFF] + | |
BitsSetTable256[(buffer >> 32) & 0xFF] + | |
BitsSetTable256[(buffer >> 40) & 0xFF] + | |
BitsSetTable256[(buffer >> 48) & 0xFF] + | |
BitsSetTable256[(buffer >> 56) & 0xFF]; | |
} | |
static inline int bit_count_bitwise(uint64_t value) | |
{ | |
uint64_t c; /* store the total here */ | |
static const uint64_t S[] = {1, 2, 4, 8, 16, 32}; /* Magic Binary Numbers */ | |
static const uint64_t B[] = {0x5555555555555555, | |
0x3333333333333333, | |
0x0F0F0F0F0F0F0F0F, | |
0x00FF00FF00FF00FF, | |
0x0000FFFF0000FFFF, | |
0x00000000FFFFFFFF}; | |
c = value - ((value >> 1) & B[0]); | |
c = ((c >> S[1]) & B[1]) + (c & B[1]); | |
c = ((c >> S[2]) + c) & B[2]; | |
c = ((c >> S[3]) + c) & B[3]; | |
c = ((c >> S[4]) + c) & B[4]; | |
c = ((c >> S[5]) + c) & B[5]; | |
return c; | |
} | |
void read_random(void* buffer, size_t len) | |
{ | |
int fd = open("/dev/urandom", O_RDONLY); | |
if(fd < 0) { | |
perror("Unable to open /dev/urandom"); | |
exit(1); | |
} | |
if(((ssize_t)len) != read(fd, buffer, len)) { | |
perror("Unable to read random"); | |
exit(1); | |
} | |
close(fd); | |
} | |
static inline uint64_t RDTSC() | |
{ | |
unsigned int hi, lo; | |
__asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)); | |
return ((uint64_t)hi << 32) | lo; | |
} | |
const int NANO_SECONDS_IN_SEC = 1000000000; | |
/* returns a static buffer of struct timespec with the time difference of ts1 and ts2 | |
ts1 is assumed to be greater than ts2 */ | |
struct timespec *timespec_diff(struct timespec *ts1, struct timespec *ts2) | |
{ | |
static struct timespec ts; | |
ts.tv_sec = ts1->tv_sec - ts2->tv_sec; | |
ts.tv_nsec = ts1->tv_nsec - ts2->tv_nsec; | |
if (ts.tv_nsec < 0) { | |
ts.tv_sec--; | |
ts.tv_nsec += NANO_SECONDS_IN_SEC; | |
} | |
return &ts; | |
} | |
double ticks_per_nanosec; | |
static void calibrate_ticks() | |
{ | |
struct timespec begints, endts; | |
uint64_t begin = 0, end = 0; | |
volatile uint64_t ii; | |
struct timespec *tmpts = NULL; | |
uint64_t nsecElapsed = 0; | |
clock_gettime(CLOCK_MONOTONIC, &begints); | |
begin = RDTSC(); | |
for (ii = 0; ii < 1000000; ii++) | |
{ | |
} | |
end = RDTSC(); | |
clock_gettime(CLOCK_MONOTONIC, &endts); | |
tmpts = timespec_diff(&endts, &begints); | |
nsecElapsed = tmpts->tv_sec * (uint64_t)1000000000 + tmpts->tv_nsec; | |
ticks_per_nanosec = (double)(end - begin)/(double)nsecElapsed; | |
} | |
void init_rdtsc() | |
{ | |
unsigned long cpuMask = 2; /* bind to cpu 1*/ | |
sched_setaffinity(0, sizeof(cpuMask), (cpu_set_t*)&cpuMask); | |
calibrate_ticks(); | |
} | |
uint64_t data[16 * 1024 * 1024]; | |
int main() | |
{ | |
int ii = 0; | |
int lookup_count = 0; | |
int bitwise_count = 0; | |
uint64_t pre_lookup; | |
uint64_t post_lookup; | |
uint64_t pre_bitwise; | |
uint64_t post_bitwise; | |
init_rdtsc(); | |
read_random(data, sizeof(data)); | |
pre_lookup = RDTSC(); | |
for(ii = 0; ii < (16 * 1024 * 1024); ii++) { | |
lookup_count += bit_count_lookup(data[ii]); | |
} | |
post_lookup = RDTSC(); | |
pre_bitwise = RDTSC(); | |
for(ii = 0; ii < (16 * 1024 * 1024); ii++) { | |
bitwise_count += bit_count_bitwise(data[ii]); | |
} | |
post_bitwise = RDTSC(); | |
printf("lookup_count = %d, bitwise_count = %d\n", lookup_count, bitwise_count); | |
printf("lookup time = %ld cycles\n", post_lookup - pre_lookup); | |
printf("bitwise time = %ld cycles\n", post_bitwise - pre_bitwise); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment