Skip to content

Instantly share code, notes, and snippets.

View tanakamura's full-sized avatar

Takashi Nakamura tanakamura

View GitHub Profile
#include <omp.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <x86intrin.h>
#define LINE_SIZE 64
#define UNICODE
#include <windows.h>
#include <stdio.h>
double f_freq;
double
getsec(void)
{
#define _GNU_SOURCE
#include <time.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
double f_freq;
double
getsec(void)
#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
double
getsec(void)
{
#define UNICODE
#include <windows.h>
#include <stdio.h>
double f_freq;
double
getsec(void)
.data
.text
.globl run_4fmaps
// extern void run_4vnniw(
// unsigned short dst[16], // rdi
// unsigned short src0[64], // rsi
// unsigned short src1[4]); // rdx
run_4fmaps:
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <getopt.h>
#include <sys/time.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <pthread.h>
# DDR3 1600 - 2ch, 4thread (theoretical peak = 25.6GB/s)
num_thread = 1
libc-memset : 1024[ B] 97.502683[GB/s]
libc-memset : 2048[ B] 106.117820[GB/s]
libc-memset : 4096[ B] 97.793161[GB/s]
libc-memset : 8192[ B] 106.070275[GB/s]
libc-memset : 16[KB] 110.699407[GB/s]
libc-memset : 32[KB] 111.365322[GB/s]
libc-memset : 64[KB] 52.301605[GB/s]
# DDR4 2400 - 2ch, 4thread (theoretical peak = 38.6GB/s)
num_thread = 1
libc-memset : 1024[ B] 43.663790[GB/s]
libc-memset : 2048[ B] 46.885033[GB/s]
libc-memset : 4096[ B] 48.524845[GB/s]
libc-memset : 8192[ B] 48.603032[GB/s]
libc-memset : 16[KB] 46.270248[GB/s]
libc-memset : 32[KB] 49.114524[GB/s]
libc-memset : 64[KB] 47.925331[GB/s]
Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
4k page
parallel rand read : 0.475830[ns/read], range=64[KB]
parallel rand read : 0.582672[ns/read], range=128[KB]
parallel rand read : 0.691971[ns/read], range=256[KB]
parallel rand read : 0.981163[ns/read], range=512[KB]
parallel rand read : 1.212128[ns/read], range=1024[KB]
parallel rand read : 1.309196[ns/read], range=2048[KB]
parallel rand read : 1.353122[ns/read], range=4096[KB]
parallel rand read : 2.068208[ns/read], range=8192[KB]