Skip to content

Instantly share code, notes, and snippets.

View tanakamura's full-sized avatar

Takashi Nakamura tanakamura

View GitHub Profile
# DDR3 1600 - 2ch, 4thread (theoretical peak = 25.6GB/s)
num_thread = 1
libc-memset : 1024[ B] 97.502683[GB/s]
libc-memset : 2048[ B] 106.117820[GB/s]
libc-memset : 4096[ B] 97.793161[GB/s]
libc-memset : 8192[ B] 106.070275[GB/s]
libc-memset : 16[KB] 110.699407[GB/s]
libc-memset : 32[KB] 111.365322[GB/s]
libc-memset : 64[KB] 52.301605[GB/s]
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <getopt.h>
#include <sys/time.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <pthread.h>
.data
.text
.globl run_4fmaps
// extern void run_4vnniw(
// unsigned short dst[16], // rdi
// unsigned short src0[64], // rsi
// unsigned short src1[4]); // rdx
run_4fmaps:
#define UNICODE
#include <windows.h>
#include <stdio.h>
double f_freq;
double
getsec(void)
#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
double
getsec(void)
{
#define _GNU_SOURCE
#include <time.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
double f_freq;
double
getsec(void)
#define UNICODE
#include <windows.h>
#include <stdio.h>
double f_freq;
double
getsec(void)
{
#include <omp.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <x86intrin.h>
#define LINE_SIZE 64
#include <immintrin.h>
#include <x86intrin.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
static inline
__m256i mm256_u8gather_epu8(const uint8_t* lut, __m256i vindex, __m256i andMask) {
__m256i lo = _mm256_unpacklo_epi8(vindex, _mm256_setzero_si256());
all: top.bin
top.blif: top.v
yosys -p "synth_ice40 -blif top.blif; clean ; write_verilog synth.v" top.v
top.txt: top.blif top.pcf
arachne-pnr -p top.pcf top.blif -o top.txt -d 8k -P ct256
top.bin: top.txt
icepack top.txt top.bin