This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <time.h> | |
#include <sys/time.h> | |
double getsec() { | |
struct timespec tv; | |
clock_gettime(CLOCK_MONOTONIC, &tv); | |
return tv.tv_sec + tv.tv_nsec/1e9; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* clzero の後にstoreするとmemsetよりはるかに悪くなる | |
* | |
* * clzeroのみ :30[GB/s] | |
* * clzero + ストア :3[GB/s] | |
* * memset :12.5[GB/s] | |
*/ | |
void *amd_clzero(void *dst, const void *src, size_t sz) | |
{ | |
size_t line_size = 64; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
num_thread = 1 | |
libc-memset : 1024[ B] 42.105008[GB/s] | |
libc-memset : 2048[ B] 42.338436[GB/s] | |
libc-memset : 4096[ B] 49.077192[GB/s] | |
libc-memset : 8192[ B] 48.563454[GB/s] | |
libc-memset : 16[KB] 50.124791[GB/s] | |
libc-memset : 32[KB] 49.960671[GB/s] | |
libc-memset : 64[KB] 49.425556[GB/s] | |
libc-memset : 128[KB] 49.101868[GB/s] | |
libc-memset : 256[KB] 47.760674[GB/s] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <unistd.h> | |
#include <time.h> | |
#include <sys/mman.h> | |
#include <getopt.h> | |
#include <sys/time.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <immintrin.h> | |
#include <pthread.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static double getsec() | |
{ | |
struct timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return (ts.tv_nsec / 1e9) + ts.tv_sec; | |
} | |
static double get_sec() { | |
struct timespec ts; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <x86intrin.h> | |
#include <math.h> | |
#include <unistd.h> | |
#include <time.h> | |
#include <sys/time.h> | |
static double calib_ops() { | |
long long n = 1024*1024*4; | |
long long core_cycle = 64 * n; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <cpuid.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <string.h> | |
#include <sys/mman.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AMD Ryzen 7 1700X Eight-Core Processor | |
4k page | |
parallel rand read : 0.464111[ns/read], range=64[KB] | |
parallel rand read : 0.494720[ns/read], range=128[KB] | |
parallel rand read : 0.567612[ns/read], range=256[KB] | |
parallel rand read : 0.642448[ns/read], range=512[KB] | |
parallel rand read : 0.713230[ns/read], range=1024[KB] | |
parallel rand read : 0.737040[ns/read], range=2048[KB] | |
parallel rand read : 0.773684[ns/read], range=4096[KB] | |
parallel rand read : 2.559083[ns/read], range=8192[KB] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz | |
4k page | |
parallel rand read : 0.475830[ns/read], range=64[KB] | |
parallel rand read : 0.582672[ns/read], range=128[KB] | |
parallel rand read : 0.691971[ns/read], range=256[KB] | |
parallel rand read : 0.981163[ns/read], range=512[KB] | |
parallel rand read : 1.212128[ns/read], range=1024[KB] | |
parallel rand read : 1.309196[ns/read], range=2048[KB] | |
parallel rand read : 1.353122[ns/read], range=4096[KB] | |
parallel rand read : 2.068208[ns/read], range=8192[KB] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DDR4 2400 - 2ch, 4thread (theoretical peak = 38.6GB/s) | |
num_thread = 1 | |
libc-memset : 1024[ B] 43.663790[GB/s] | |
libc-memset : 2048[ B] 46.885033[GB/s] | |
libc-memset : 4096[ B] 48.524845[GB/s] | |
libc-memset : 8192[ B] 48.603032[GB/s] | |
libc-memset : 16[KB] 46.270248[GB/s] | |
libc-memset : 32[KB] 49.114524[GB/s] | |
libc-memset : 64[KB] 47.925331[GB/s] |