Last active
March 4, 2017 17:17
-
-
Save tanakamura/d660454d24a455658a2662acb2d6081c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <cpuid.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <hugetlbfs.h> | |
double sec(void) { | |
struct timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return ts.tv_sec + ts.tv_nsec / 1000000000.0; | |
} | |
typedef unsigned int elem_t; | |
size_t size_max = 1024 * 1024 * 512ULL; | |
void | |
do_test(elem_t *mem, int full_dep) | |
{ | |
size_t cur_size = 64*1024; | |
for (size_t i=0; i<size_max/sizeof(elem_t); i++) { | |
mem[i] = i; | |
} | |
while (1) { | |
size_t nelem = cur_size / sizeof(elem_t); | |
size_t niter = 16; | |
for (size_t i=0; i<nelem; i++) { | |
mem[i] = i; | |
} | |
for (size_t i=0; i<nelem; i++) { | |
elem_t pos = (elem_t)(drand48()*nelem); | |
elem_t t = mem[i]; | |
mem[i] = mem[pos]; | |
mem[pos] = t; | |
} | |
niter /= (nelem / 64*1024); | |
if (niter < 1) { | |
niter = 1; | |
} | |
if (nelem > 16*1024*1024) { | |
nelem = 16*1024*1024; | |
} | |
double t0 = sec(); | |
elem_t sum = 0; | |
if (full_dep) { | |
for (size_t ii=0; ii<niter; ii++) { | |
elem_t prev = 0; | |
for (size_t ei=0; ei<nelem; ei++) { | |
elem_t v = mem[prev]; | |
sum += v; | |
prev = v; | |
__asm__ __volatile__ (" " ::: "memory"); | |
} | |
} | |
} else { | |
for (size_t ii=0; ii<niter; ii++) { | |
for (size_t ei=0; ei<nelem; ei++) { | |
sum += mem[mem[ei]]; | |
__asm__ __volatile__ (" " ::: "memory"); | |
} | |
} | |
} | |
__asm__ __volatile__ (" " ::"r"(sum)); | |
double t1 = sec(); | |
long long total_transaction = niter * nelem; | |
double ns_per_read = ((t1-t0) / total_transaction) * (1e9); | |
if (full_dep) { | |
if (cur_size < 16*1024) { | |
printf("dep rand read : %f[ns/read], range=%zd[B]\n", ns_per_read, cur_size); | |
} else if (cur_size < 16*1024ULL*1024ULL) { | |
printf("dep rand read : %f[ns/read], range=%zd[KB]\n", ns_per_read, cur_size/1024); | |
} else { | |
printf("dep rand read : %f[ns/read], range=%zd[MB]\n", ns_per_read, cur_size/(1024*1024)); | |
} | |
} else { | |
if (cur_size < 16*1024) { | |
printf("parallel rand read : %f[ns/read], range=%zd[B]\n", ns_per_read, cur_size); | |
} else if (cur_size < 16*1024ULL*1024ULL) { | |
printf("parallel rand read : %f[ns/read], range=%zd[KB]\n", ns_per_read, cur_size/1024); | |
} else { | |
printf("parallel rand read : %f[ns/read], range=%zd[MB]\n", ns_per_read, cur_size/(1024*1024)); | |
} | |
} | |
cur_size *= 2; | |
if (cur_size > size_max) { | |
break; | |
} | |
} | |
} | |
int main(int argc, char **argv) | |
{ | |
#ifdef _WIN32 | |
#define x_cpuid(p,eax) __cpuid(p, eax) | |
typedef int cpuid_t; | |
#else | |
#define x_cpuid(p,eax) __get_cpuid(eax, &(p)[0], &(p)[1], &(p)[2], &(p)[3]); | |
typedef unsigned int cpuid_t; | |
#endif | |
{ | |
cpuid_t data[4*3+1]; | |
x_cpuid(data+4*0, 0x80000002); | |
x_cpuid(data+4*1, 0x80000003); | |
x_cpuid(data+4*2, 0x80000004); | |
data[12] = 0; | |
puts((char*)data); | |
} | |
elem_t *mem = (elem_t*)malloc(size_max); | |
puts("4k page"); | |
do_test(mem, 0); | |
do_test(mem, 1); | |
free(mem); | |
if (size_max < (1024*1024*1024ULL)) { | |
long list[4]; | |
gethugepagesizes(list, 4); | |
int fd = hugetlbfs_unlinked_fd_for_size(1024*1024*1024ULL); | |
if (fd < 0) { | |
fprintf(stderr, "warn : cannot allocate 1gb page\n"); | |
return 1; | |
} | |
void *p = mmap(0, 1024*1024*1024ULL, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); | |
puts("1G page"); | |
do_test(p, 0); | |
do_test(p, 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment