Last active
July 15, 2022 03:20
-
-
Save axman6/11ef82813b0c86b64315b8f91df6abfa to your computer and use it in GitHub Desktop.
pop count experiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
// #include <benchmark/benchmark.h> | |
const size_t popcount(uint16_t x) { | |
return __builtin_popcount(x); | |
} | |
const size_t popcount16_5(uint16_t x) { | |
uint64_t xx = x; | |
// uint64_t = (xx << 48) | (xx << 32) | (xx << 16) | xx; | |
uint64_t a = xx * 0x0001000100010001; | |
uint64_t b = a & 0x8888444422221111; // 0x8888444422221111 | |
uint64_t c = b + (b >> 34); | |
uint16_t d = (uint16_t)(c + (c >> 17)); | |
uint8_t e = (uint8_t)(d + (d >> 8)); | |
uint8_t f = (uint8_t)(e + (e >> 4)); | |
return (size_t) f & 0x0F; | |
} | |
const size_t popcount16_6(uint16_t x) { | |
uint64_t xx = x; | |
// uint64_t = (xx << 48) | (xx << 32) | (xx << 16) | xx; | |
uint64_t a = xx * 0x0001000100010001; | |
uint64_t b = a & 0x8888444422221111; // 0x8888444422221111 | |
uint64_t c = b + (b >> 34); | |
uint64_t d = c + (c >> 17); | |
uint64_t e = d + (d >> 8); | |
uint64_t f = e + (e >> 4); | |
return (size_t) f & 0x0F; | |
} | |
const size_t popcount16_7(uint16_t x) { | |
const uint64_t spread = 0x8040201008040201; | |
const uint64_t select = 0x8080808080808080; | |
uint64_t xx = (spread * ((uint64_t)x & 0xFF)) & select; | |
uint64_t yy = (spread * (((uint64_t)x >> 8) & 0xFF)) & select; | |
xx >>= 7; | |
yy >>= 7; | |
const uint64_t mulMask = 0x0101010101010101; | |
xx *= mulMask; | |
yy *= mulMask; | |
return (xx >> 56) + (yy >> 56); | |
} | |
const size_t popcount16(uint16_t x) { | |
size_t i; | |
size_t count = 0; | |
for (i = 0; i < 16; i++) { | |
count += x & 1; | |
x >>= 1; | |
} | |
return count; | |
} | |
const size_t popcount16_2(uint16_t x) { | |
size_t i; | |
size_t count = 0; | |
while (x) { | |
count += x & 1; | |
x >>= 1; | |
} | |
return count; | |
} | |
const size_t popcount16_3(uint16_t x) { | |
#define STEP(a,b,n,pat) size_t a = (b & pat) + ((b >> n) & pat) | |
STEP(a,((size_t)x),1,0x5555); | |
STEP(b,a,2,0x3333); | |
STEP(c,b,4,0x0F0F); | |
STEP(d,c,8,0x00FF); | |
return d; | |
#undef STEP | |
} | |
#ifndef BENCHMARK | |
#define TARGET popcount16_7 | |
int main(int charc, char** argv) { | |
for(int i = 0; i < 65535; i++){ | |
int p5 = TARGET(i); | |
if(popcount16(i) != p5){ | |
printf("%4d: %4lu %4u\n", i, popcount16(i), TARGET(i)); | |
} | |
} | |
} | |
#else | |
static void BM_popcount16(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16); | |
static void BM_popcount16_2(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16_2(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16_2); | |
static void BM_popcount16_3(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16_3(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16_3); | |
static void BM_popcount16_5(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16_5(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16_5); | |
static void BM_popcount16_6(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16_6(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16_6); | |
static void BM_popcount16_7(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount16_7(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount16_7); | |
static void BM_popcount(benchmark::State& state) { | |
uint16_t x = 0; | |
volatile size_t y = 0; | |
// Perform setup here | |
for (auto _ : state) { | |
// This code gets timed | |
y += popcount(x); | |
x++; | |
} | |
y; | |
} | |
// Register the function as a benchmark | |
BENCHMARK(BM_popcount); | |
#endif | |
/* | |
6 5 4 4 3 2 1 | |
4 6 8 0 2 4 6 8 0 | |
1000100010001000010001000100010000100010001000100001000100010001 | |
100010001000100001000100010001 | |
1000100010001 | |
10001 | |
0001 | |
0b1000100010001000010001000100010000100010001000100001000100010001 | |
>> 34 | |
10001000_01000100_00100010_00010001 | |
+ 00000000_00000000_00100010_00010001 >> 18 | |
= 10001000_01000100_01000100_00100010 | |
+ 00000000_00000000_00000000_00010001 >> 10 | |
= 10001000010001000100010000110011 | |
+ 00000000000000000000000000000001 >> 4 | |
= 10001000010001000100010000110100 | |
// 1000000001000000001000000001000000001000000001000000001000000001 | |
spread | |
1000000010000000100000001000000010000000100000001000000010000000 | |
mul | |
0000000100000001000000010000000100000001000000010000000100000001 | |
00000100 | |
mul^2 | |
0000001110000011000000101000001000000001100000010000000010000000 | |
100000010000000110000010000000101000001100000011 | |
10000100000000111000001100000010100000100000000110000001000000001 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment