This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/bittnkr/uniq/issues/2 | |
// compile with g++ -m32 -O1 -pthread -Wall | |
// based on sandbox/hello.cpp but parameterized for qtype instead of int | |
// and queuing values that have non-zero high halves (and aren't all the same as each other!!!) | |
#include <pthread.h> | |
#include <stdio.h> | |
#include <thread> | |
#include <vector> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; 3 versions, only one enabled with %if 1 NASM/YASM preprocessor stuff, like C #if 0 / #if 1 | |
;; shuffle on the fly | |
;; copy + in-place | |
;; read-only up/down/bidir | |
;;; ~/bin/asm-link loop-up-down.asm && disas loop-up-down && ocperf.py stat -etask-clock,page-faults,cycles,L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,instructions,dtlb_store_misses.miss_causes_a_walk -r4 ./loop-up-down | |
;;;~/bin/asm-link loop-up-down.asm && rm loop-up-down.o && disas loop-up-down && nice ocperf.py stat -etask-clock,page-faults,cycles,L1-dcache-loads,LLC-loads,LLC-load-misses,instructions,dtlb_store_misses.miss_causes_a_walk,dtlb_load_misses.stlb_hit,dtlb_load_misses.miss_causes_a_walk -r3 ./loop-up-down | |
default rel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// x86 SIMD string to uppercase | |
// See http://stackoverflow.com/questions/735204/convert-a-string-in-c-to-upper-case | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <strings.h> // for ffs | |
#include <ctype.h> | |
#include <immintrin.h> |