Skip to content

Instantly share code, notes, and snippets.

@tanakamura
Last active March 4, 2017 17:17
Show Gist options
  • Save tanakamura/d660454d24a455658a2662acb2d6081c to your computer and use it in GitHub Desktop.
Save tanakamura/d660454d24a455658a2662acb2d6081c to your computer and use it in GitHub Desktop.
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <cpuid.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <hugetlbfs.h>
double sec(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec + ts.tv_nsec / 1000000000.0;
}
typedef unsigned int elem_t;
size_t size_max = 1024 * 1024 * 512ULL;
void
do_test(elem_t *mem, int full_dep)
{
size_t cur_size = 64*1024;
for (size_t i=0; i<size_max/sizeof(elem_t); i++) {
mem[i] = i;
}
while (1) {
size_t nelem = cur_size / sizeof(elem_t);
size_t niter = 16;
for (size_t i=0; i<nelem; i++) {
mem[i] = i;
}
for (size_t i=0; i<nelem; i++) {
elem_t pos = (elem_t)(drand48()*nelem);
elem_t t = mem[i];
mem[i] = mem[pos];
mem[pos] = t;
}
niter /= (nelem / 64*1024);
if (niter < 1) {
niter = 1;
}
if (nelem > 16*1024*1024) {
nelem = 16*1024*1024;
}
double t0 = sec();
elem_t sum = 0;
if (full_dep) {
for (size_t ii=0; ii<niter; ii++) {
elem_t prev = 0;
for (size_t ei=0; ei<nelem; ei++) {
elem_t v = mem[prev];
sum += v;
prev = v;
__asm__ __volatile__ (" " ::: "memory");
}
}
} else {
for (size_t ii=0; ii<niter; ii++) {
for (size_t ei=0; ei<nelem; ei++) {
sum += mem[mem[ei]];
__asm__ __volatile__ (" " ::: "memory");
}
}
}
__asm__ __volatile__ (" " ::"r"(sum));
double t1 = sec();
long long total_transaction = niter * nelem;
double ns_per_read = ((t1-t0) / total_transaction) * (1e9);
if (full_dep) {
if (cur_size < 16*1024) {
printf("dep rand read : %f[ns/read], range=%zd[B]\n", ns_per_read, cur_size);
} else if (cur_size < 16*1024ULL*1024ULL) {
printf("dep rand read : %f[ns/read], range=%zd[KB]\n", ns_per_read, cur_size/1024);
} else {
printf("dep rand read : %f[ns/read], range=%zd[MB]\n", ns_per_read, cur_size/(1024*1024));
}
} else {
if (cur_size < 16*1024) {
printf("parallel rand read : %f[ns/read], range=%zd[B]\n", ns_per_read, cur_size);
} else if (cur_size < 16*1024ULL*1024ULL) {
printf("parallel rand read : %f[ns/read], range=%zd[KB]\n", ns_per_read, cur_size/1024);
} else {
printf("parallel rand read : %f[ns/read], range=%zd[MB]\n", ns_per_read, cur_size/(1024*1024));
}
}
cur_size *= 2;
if (cur_size > size_max) {
break;
}
}
}
int main(int argc, char **argv)
{
#ifdef _WIN32
#define x_cpuid(p,eax) __cpuid(p, eax)
typedef int cpuid_t;
#else
#define x_cpuid(p,eax) __get_cpuid(eax, &(p)[0], &(p)[1], &(p)[2], &(p)[3]);
typedef unsigned int cpuid_t;
#endif
{
cpuid_t data[4*3+1];
x_cpuid(data+4*0, 0x80000002);
x_cpuid(data+4*1, 0x80000003);
x_cpuid(data+4*2, 0x80000004);
data[12] = 0;
puts((char*)data);
}
elem_t *mem = (elem_t*)malloc(size_max);
puts("4k page");
do_test(mem, 0);
do_test(mem, 1);
free(mem);
if (size_max < (1024*1024*1024ULL)) {
long list[4];
gethugepagesizes(list, 4);
int fd = hugetlbfs_unlinked_fd_for_size(1024*1024*1024ULL);
if (fd < 0) {
fprintf(stderr, "warn : cannot allocate 1gb page\n");
return 1;
}
void *p = mmap(0, 1024*1024*1024ULL, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
puts("1G page");
do_test(p, 0);
do_test(p, 1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment