Skip to content

Instantly share code, notes, and snippets.

@simonwagner
Created January 3, 2016 18:04
Show Gist options
  • Save simonwagner/387cfd7e96ca03c9c71a to your computer and use it in GitHub Desktop.
Save simonwagner/387cfd7e96ca03c9c71a to your computer and use it in GitHub Desktop.
Calculate how many numbers have the bit i set in the range 0-n (excluding n)
#include <iostream>
#include <limits>
#include <functional>
#include <chrono>
using namespace std;
#define likely(x) __builtin_expect (!!(x), 1)
#define unlikely(x) __builtin_expect (!!(x), 0)
struct verification_result {
bool valid;
uint32_t n;
uint32_t i;
uint32_t expected;
uint32_t result;
};
void VerificationFail()
{
}
#define TESTED_BIT_RANGE 32
template<uint32_t (*F)(uint32_t, uint32_t)>
static verification_result verify()
{
uint32_t expected_value_for_bit[TESTED_BIT_RANGE];
//initialize expected values with 0
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
expected_value_for_bit[i] = 0;
}
//Test for all values of n
uint32_t n = 1;
while(true) {
//verify result
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
uint32_t result = F(n, i);
if(unlikely(result != expected_value_for_bit[i])) {
VerificationFail();
return {false, n, i, expected_value_for_bit[i], result};
}
}
//now update the expected values
for(uint8_t i = 0; i < TESTED_BIT_RANGE; ++i) {
expected_value_for_bit[i] += (n & (1 << i)) > 0 ? 1 : 0;
}
if(unlikely(n % (numeric_limits<uint32_t>::max()/100) == 0)) {
cout << ".";
cout.flush();
}
if(likely(n < numeric_limits<uint32_t>::max())) {
++n;
}
else {
break;
}
}
return {true, 0, 0, 0, 0};
}
template<uint32_t (*F)(uint32_t, uint32_t)>
static void verify_algorithm(const char* name)
{
cout << "Verifiying " << name;
verification_result verify_result = verify<F>();
if(verify_result.valid) {
cout << "ok" << endl;
}
else {
cout << "FAIL!" << endl;
cout << "\t failed for n = " << verify_result.n << " i = " << verify_result.i << endl;
cout << "\t expected " << verify_result.expected << ", but got " << verify_result.result << " instead" << endl;
}
}
//SIMON
/*
return 0xFFFFFFFF if bit at index i is set, else return 0x0
*/
static inline uint32_t mask_from_bit_at_index(uint32_t source, uint32_t i)
{
//(~source >> i) & 1 will be 1 if bit at i is 0, else it will be 0
//then we subtract 1, so 0 will be 0xFFFFFFFF, and 1 will be 0x0
return ((~source >> i) & 1) - 1;
}
static inline uint32_t mod_power_of_2(uint32_t x, uint32_t power)
{
return x & ((1ull << power) - 1);
}
static inline uint32_t unset_bit_at(uint32_t x, uint32_t i)
{
uint32_t mask = ~(1 << i);
return x & mask;
}
/*
Basic calculation:
f(n, i) = (n+1)//2^(i+1)*2^i + zeta((n+1) % 2^(i+1) - 2^i)
with zeta(x) = x >= 0 ? x : 0
*/
static uint32_t simon_i_bit_set_in_range(uint32_t n, uint32_t i)
{
uint32_t N = n;
//uint32_t count_i_bit_set_in_period = ((N >> i) >> 1) << i; //fancy version of n//2^(i+1)*2^i
uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);
uint32_t remaining = mod_power_of_2(N, i + 1);
//We want to subtract 2^i from remaining, but if the result is negative we want 0
//To achieve this, we first set the bit at i to 0 and then AND the result with a mask that is 0xFFFFFFFF if bit i is set and 0 if not
uint32_t count_i_bit_set_in_remaining_part = unset_bit_at(remaining, i) & mask_from_bit_at_index(remaining, i);
return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
}
//Now that I understand how the multiply is optimized away...
//But now it's nearly the same as Johannes' code
//Just n//2^(i+1)*2^i seems to be calculated a little bit faster
static uint32_t simon_i_bit_set_in_range2(uint32_t n, uint32_t i)
{
uint32_t N = n;
uint32_t count_i_bit_set_in_period = (N >> 1) & (0xFFFFFFFF << i);
uint32_t remaining = mod_power_of_2(N, i);
uint32_t count_i_bit_set_in_remaining_part = remaining & mask_from_bit_at_index(N, i);
return count_i_bit_set_in_period + count_i_bit_set_in_remaining_part;
}
//SIMON END
//JOHANNES
static uint32_t johannes_i_bit_set_in_range(uint32_t n, uint32_t bit)
{
return (((uint64_t)n >> (bit + 1)) << bit) + (n & ((1ull << bit)-1)) * ((n >> bit) & 1);
}
//JOHANNES END
template<uint32_t (*F)(uint32_t, uint32_t)>
static void benchmark(const char* name) __attribute__ ((optnone))
{
const uint32_t N = 0x00ffffff;
const uint32_t I = 31;
uint64_t calls = 0;
volatile uint32_t result;
auto start = chrono::high_resolution_clock::now();
for(uint32_t n = 0; n < N; ++n) {
for(uint32_t i = 0; i < I; ++i) {
result = F(n, i);
++calls;
}
}
auto end = chrono::high_resolution_clock::now();
auto diff = end - start;
double time_ms = chrono::duration<double, milli>(diff).count();
cout << "Running time for " << name << ": " << endl
<< "\t" << time_ms << "ms" << endl
<< "\t" << (uint32_t)(calls/time_ms*1000.0) << " ops/s" << endl;
}
int main(int argc, char* argv[])
{
benchmark<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
benchmark<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
benchmark<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");
//verify_algorithm<johannes_i_bit_set_in_range>("johannes_i_bit_set_in_range");
verify_algorithm<simon_i_bit_set_in_range>("simon_i_bit_set_in_range");
verify_algorithm<simon_i_bit_set_in_range2>("simon_i_bit_set_in_range2");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment