Created
January 3, 2016 18:04
-
-
Save simonwagner/387cfd7e96ca03c9c71a to your computer and use it in GitHub Desktop.
Calculate how many numbers have the bit i set in the range 0-n (excluding n)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <limits> | |
#include <functional> | |
#include <chrono> | |
using namespace std; | |
#define likely(x) __builtin_expect (!!(x), 1) | |
#define unlikely(x) __builtin_expect (!!(x), 0) | |
struct verification_result { | |
bool valid; | |
uint32_t n; | |
uint32_t i; | |
uint32_t expected; | |
uint32_t result; | |
}; | |
void VerificationFail() | |
{ | |
} | |
#define TESTED_BIT_RANGE 32 | |
template<uint32_t (*F)(uint32_t, uint32_t)> | |
static verification_result verify() | |
{ | |
uint32_t expected_value_for_bit[TESTED_BIT_RANGE]; | |
//initialize expected values with 0 | |
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) { | |
expected_value_for_bit[i] = 0; | |
} | |
//Test for all values of n | |
uint32_t n = 1; | |
while(true) { | |
//verify result | |
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) { | |
uint32_t result = F(n, i); | |
if(unlikely(result != expected_value_for_bit[i])) { | |
VerificationFail(); | |
return {false, n, i, expected_value_for_bit[i], result}; | |
} | |
} | |
//now update the expected values | |
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) { | |
expected_value_for_bit[i] += (n & (1 << i)) > 0 ? 1 : 0; | |
} | |
if(unlikely(n % (numeric_limits<uint32_t>::max()/100) == 0)) { | |
cout << "."; | |
cout.flush(); | |
} | |
if(likely(n < numeric_limits<uint32_t>::max())) { | |
++n; | |
} | |
else { | |
break; | |
} | |
} | |
return {true, 0, 0, 0, 0}; | |
} | |
template<uint32_t (*F)(uint32_t, uint32_t)> | |
static void verify_algorithm(const char* name) | |
{ | |
cout << "Verifiying " << name; | |
verification_result verify_result = verify<F>(); | |
if(verify_result.valid) { | |
cout << "ok" << endl; | |
} | |
else { | |
cout << "FAIL!" << endl; | |
cout << "\t failed for n = " << verify_result.n << " i = " << verify_result.i << endl; | |
cout << "\t expected " << verify_result.expected << ", but got " << verify_result.result << " instead" << endl; | |
} | |
} | |
//SIMON | |
/* | |
return 0xFFFFFFFF if bit at index i is set, else return 0x0 | |
*/ | |
static inline uint32_t mask_from_bit_at_index(uint32_t source, uint32_t i) | |
{ | |
//(~source >> i) & 1 will be 1 if bit at i is 0, else it will be 0 | |
//then we subtract 1, so 0 will be 0xFFFFFFFF, and 1 will be 0x0 | |
return ((~source >> i) & 1) - 1; | |
} | |
static inline uint32_t mod_power_of_2(uint32_t x, uint32_t power) | |
{ | |
return x & ((1ull << power) - 1); | |
} | |
static inline uint32_t unset_bit_at(uint32_t x, uint32_t i) | |
{ | |
uint32_t mask = ~(1 << i); | |
return x & mask; | |
} | |
/* | |
Basic calculation: | |
f(n, i) = (n+1)//2^(i+1)*2^i + zeta((n+1) % 2^(i+1) - 2^i) | |
with zeta(x) = x >= 0 ? x : 0 | |
*/ | |
static uint32_t simon_i_bit_set_in_range(uint32_t n, uint32_t i) | |
{ | |
uint32_t N = n; | |
//uint32_t count_i_bit_set_in_period = ((N >> i) >> 1) << i; //fancy version of n//2^(i+1)*2^i | |
uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i); | |
uint32_t remaining = mod_power_of_2(N, i + 1); | |
//We want to subtract 2^i from remaining, but if the result is negative we want 0 | |
//To achieve this, we first set the bit at i to 0 and then AND the result with a mask that is 0xFFFFFFFF if bit i is set and 0 if not | |
uint32_t count_i_bit_set_in_remaining_part = unset_bit_at(remaining, i) & mask_from_bit_at_index(remaining, i); | |
return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part; | |
} | |
//Now that I understand how the multiply is optimized away... | |
//But now it's nearly the same as Johannes' code | |
//Just n//2^(i+1)*2^i seems to be calculated a little bit faster | |
static uint32_t simon_i_bit_set_in_range2(uint32_t n, uint32_t i) | |
{ | |
uint32_t N = n; | |
uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i); | |
uint32_t remaining = mod_power_of_2(N, i); | |
uint32_t count_i_bit_set_in_remaining_part = remaining & mask_from_bit_at_index(N, i); | |
return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part; | |
} | |
//SIMON END | |
//JOHANNES | |
static uint32_t johannes_i_bit_set_in_range(uint32_t n, uint32_t bit) | |
{ | |
return (((uint64_t)n >> (bit + 1)) << bit) + (n & ((1ull << bit)-1)) * ((n >> bit) & 1); | |
} | |
//JOHANNES END | |
template<uint32_t (*F)(uint32_t, uint32_t)> | |
static void benchmark(const char* name) __attribute__ ((optnone)) | |
{ | |
const uint32_t N = 0x00ffffff; | |
const uint32_t I = 31; | |
uint64_t calls = 0; | |
volatile uint32_t result; | |
auto start = chrono::high_resolution_clock::now(); | |
for(uint32_t n = 0; n < N; ++n) { | |
for(uint32_t i = 0; i < I; ++i) { | |
result = F(n, i); | |
++calls; | |
} | |
} | |
auto end = chrono::high_resolution_clock::now(); | |
auto diff = end - start; | |
double time_ms = chrono::duration<double, milli>(diff).count(); | |
cout << "Running time for " << name << ": " << endl | |
<< "\t" << time_ms << "ms" << endl | |
<< "\t" << (uint32_t)(calls/time_ms*1000.0) << " ops/s" << endl; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
benchmark<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range"); | |
benchmark<simon_i_bit_set_in_range>("simon_i_bit_set_in_range"); | |
benchmark<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2"); | |
//verify_algorithm<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range"); | |
verify_algorithm<simon_i_bit_set_in_range>("simon_i_bit_set_in_range"); | |
verify_algorithm<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment