This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static const char* const sourceDataPath = R"(C:\Temp\2remove\vectors.csv)"; | |
#define _CRT_SECURE_NO_WARNINGS | |
#include <iostream> | |
#include <chrono> | |
#include <immintrin.h> | |
#include <assert.h> | |
using namespace std; | |
using namespace std::chrono; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static const char* const sourceDataPath = R"(C:\Temp\2remove\vectors.csv)"; | |
#define _CRT_SECURE_NO_WARNINGS | |
#include <iostream> | |
#include <chrono> | |
#include <immintrin.h> | |
using namespace std; | |
using namespace std::chrono; | |
constexpr int SIZE = 640000; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <emmintrin.h> // SSE 2 | |
#include <tmmintrin.h> // SSSE 3 | |
#include <smmintrin.h> // SSE 4.1 | |
// Vector constants for dot4Sse function | |
struct ConstantVectorsSse | |
{ | |
__m128i abcd; | |
__m128i lowNibbleMask; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <vector> | |
#include <set> | |
static bool s_log = false; | |
void message( const char* what ) | |
{ | |
if( s_log ) | |
{ | |
printf( "%s\n", what ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
std::vector<std::string> someFunction( const Invocation& invocation ) | |
{ | |
// Define hash and comparison for string pointers, by value | |
struct StringPtrTraits | |
{ | |
size_t operator()( const std::string* rsi ) const | |
{ | |
return std::hash<std::string>()( *rsi ); | |
} | |
bool operator()( const std::string* a, const std::string* b ) const |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
#include <stdint.h> | |
// 1 = use `vpgatherdq` to load 4 numbers with 1 instruction, 0 = load them with scalar loads | |
// It seems on AMD CPUs scalar loads are slightly faster | |
#define USE_GATHER_INSTUCTIONS 0 | |
// Inclusive prefix sum of unsigned bytes = offsets of the end of the numbers | |
// When the sum of all bytes exceeds 0xFF, the output is garbage | |
// Which is fine here because our bytes are in [0..8] interval |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <immintrin.h> | |
#include <intrin.h> | |
#include <stdio.h> | |
// Count of set bits in `plus` minus count of set bits in `minus` | |
// The result is in [ -32 .. +32 ] interval | |
inline int popCntDiff( uint32_t plus, uint32_t minus ) | |
{ | |
plus = __popcnt( plus ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Transform 4 inputs with 4 lookup tables, making 4 outputs | |
// The 4 inputs are packed in uint32_t value, each byte is expected to be in [ 0 .. 15 ] interval | |
// The 4 tables are in a single AVX2 vector | |
uint32_t applyLookup4( uint32_t i4, __m256i tables4 ) | |
{ | |
// Move 4 bytes into SSE vector | |
__m128i bytes = _mm_cvtsi32_si128( (int)i4 ); | |
// Expand bytes into uint64_t lanes | |
__m256i v = _mm256_cvtepu8_epi64( bytes ); | |
// Multiply them by 4 to get shift amounts in bits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <random> | |
#include <vector> | |
#include <unordered_map> | |
#include <algorithm> | |
#include <optional> | |
#include <intrin.h> | |
#include <inttypes.h> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Linq.Expressions; | |
using System.Reflection; | |
using System.Runtime.CompilerServices; | |
static class ReflectTest | |
{ | |
/// <summary>Generic method to call</summary> | |
public static T GetValue<T>( T value ) | |
{ | |
return value; |
NewerOlder