Skip to content

Instantly share code, notes, and snippets.

View tanakamura's full-sized avatar

Takashi Nakamura tanakamura

View GitHub Profile
AMD Ryzen 7 1700X Eight-Core Processor
4k page
parallel rand read : 0.464111[ns/read], range=64[KB]
parallel rand read : 0.494720[ns/read], range=128[KB]
parallel rand read : 0.567612[ns/read], range=256[KB]
parallel rand read : 0.642448[ns/read], range=512[KB]
parallel rand read : 0.713230[ns/read], range=1024[KB]
parallel rand read : 0.737040[ns/read], range=2048[KB]
parallel rand read : 0.773684[ns/read], range=4096[KB]
parallel rand read : 2.559083[ns/read], range=8192[KB]
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <cpuid.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <x86intrin.h>
#include <math.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
static double calib_ops() {
long long n = 1024*1024*4;
long long core_cycle = 64 * n;
static double getsec()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (ts.tv_nsec / 1e9) + ts.tv_sec;
}
static double get_sec() {
struct timespec ts;
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <getopt.h>
#include <sys/time.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <pthread.h>
num_thread = 1
libc-memset : 1024[ B] 42.105008[GB/s]
libc-memset : 2048[ B] 42.338436[GB/s]
libc-memset : 4096[ B] 49.077192[GB/s]
libc-memset : 8192[ B] 48.563454[GB/s]
libc-memset : 16[KB] 50.124791[GB/s]
libc-memset : 32[KB] 49.960671[GB/s]
libc-memset : 64[KB] 49.425556[GB/s]
libc-memset : 128[KB] 49.101868[GB/s]
libc-memset : 256[KB] 47.760674[GB/s]
/*
* clzero の後にstoreするとmemsetよりはるかに悪くなる
*
* * clzeroのみ :30[GB/s]
* * clzero + ストア :3[GB/s]
* * memset :12.5[GB/s]
*/
void *amd_clzero(void *dst, const void *src, size_t sz)
{
size_t line_size = 64;
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
double getsec() {
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return tv.tv_sec + tv.tv_nsec/1e9;
}
Ryzen 7 3700x vs i7-6700
============= LATENCY ==============================================================================
instruction | IPC ( rel[%]), CPI ( rel[%])
------------------------------------------+---------------------------------------------------------
m128 addps | 0.33-0.25 ( 33.3[%]), 3.00-4.00 ( -25.0[%])
m128 aesdec | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%])
m128 aesdeclast | 0.25-0.25 ( -0.0[%]), 4.00-4.00 ( 0.0[%])
m128 aesenc | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%])
m128 aesenclast | 0.25-0.25 ( -0.0[%]), 4.00-4.00 ( 0.0[%])
num_thread = 1
libc-memset : 1024[ B] 109.797060[GB/s]
libc-memset : 2048[ B] 119.469974[GB/s]
libc-memset : 4096[ B] 124.585380[GB/s]
libc-memset : 8192[ B] 127.374256[GB/s]
libc-memset : 16[KB] 126.546911[GB/s]
libc-memset : 32[KB] 125.730241[GB/s]
libc-memset : 64[KB] 120.244104[GB/s]
libc-memset : 128[KB] 118.669685[GB/s]
libc-memset : 256[KB] 112.864779[GB/s]