Skip to content

Instantly share code, notes, and snippets.

@jdtournier
Last active April 18, 2019 09:56
Show Gist options
  • Select an option

  • Save jdtournier/09c2986321fd3532cc3e2a1fdb7496bc to your computer and use it in GitHub Desktop.

Select an option

Save jdtournier/09c2986321fd3532cc3e2a1fdb7496bc to your computer and use it in GitHub Desktop.
vector<bool> tests
===========================================
g++
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7f4d05b3c010
0.012332 constructor
4.54972 find_true = 268435203
5.87218 count_true = 2
11.6554 rotate_64bits
0.002081 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7f4d03b3b010
7.40615 copy_test
12.6272 swap_ranges
real 0m42.143s
user 0m42.083s
sys 0m0.027s
===========================================
g++ -O2
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7ff6810ae010
0.012357 constructor
0.298897 find_true = 268435203
0.286135 count_true = 2
0.68443 rotate_64bits
0.002139 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7ff67f0ad010
0.737079 copy_test
0.753826 swap_ranges
real 0m2.793s
user 0m2.757s
sys 0m0.033s
===========================================
g++ -O3
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7ff92a23a010
0.012148 constructor
0.2872 find_true = 268435203
0.287541 count_true = 2
0.734427 rotate_64bits
0.002044 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7ff928239010
0.667283 copy_test
0.76056 swap_ranges
real 0m2.769s
user 0m2.742s
sys 0m0.023s
===========================================
clang++
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7fc402c34010
0.012382 constructor
6.00291 find_true = 268435203
6.57037 count_true = 2
12.3009 rotate_64bits
0.00219 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7fc400c33010
7.41892 copy_test
13.1829 swap_ranges
real 0m45.508s
user 0m45.303s
sys 0m0.063s
===========================================
clang++ -O2
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7fd6b0a28010
0.012262 constructor
0.416594 find_true = 268435203
0.423148 count_true = 2
0.838044 rotate_64bits
0.002117 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7fd6aea27010
0.695481 copy_test
0.992936 swap_ranges
real 0m3.399s
user 0m3.375s
sys 0m0.020s
===========================================
clang++ -O3
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7f7d85c33010
0.012232 constructor
0.414085 find_true = 268435203
0.421944 count_true = 2
0.828335 rotate_64bits
0.002097 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7f7d83c32010
0.689993 copy_test
0.990964 swap_ranges
real 0m3.378s
user 0m3.352s
sys 0m0.023s
#!/bin/bash
function run {
echo "==========================================="
echo "$@"
echo "==========================================="
"$@" vector_bool.cpp -o vector_bool
time ./vector_bool
echo ""
}
run g++
run g++ -O2
run g++ -O3
run clang++
run clang++ -O2
run clang++ -O3
#include <vector>
#include <algorithm>
#include <chrono>
#include <iostream>
// to track memory allocations:
void * operator new(decltype(sizeof(0)) n)
{
void * p = malloc (n);
std::cerr << " <- allocating " << n << " bytes (" << (double(n)/(1<<20)) << " MB) at " << p << "\n";
return p;
}
void operator delete(void * p)
{
//std::cerr << " <- freeing memory at " << p << "\n";
free (p);
}
// for precise timings:
class Timer {
public:
explicit Timer () : from (std::chrono::high_resolution_clock::now()) { }
void start () {
from = std::chrono::high_resolution_clock::now();
}
double elapsed() const {
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - from).count() * 1.0e-6;
}
private:
std::chrono::high_resolution_clock::time_point from;
};
// A bunch of operations to be tested:
typedef std::vector<bool> VB;
#define NOINLINE __attribute__((noinline))
NOINLINE int find_true(const VB &c) {
auto pos = std::find(c.begin(), c.end(), true);
if (pos == c.end())
return -1;
return pos - c.begin();
}
NOINLINE int count_true(const VB &c) {
return std::count(c.begin(), c.end(), true);
}
NOINLINE void rotate_64bits(VB &c) {
std::rotate(c.begin(), c.begin()+64, c.end());
}
NOINLINE void fill_true(VB &c) {
std::fill(c.begin(), c.end(), true);
}
NOINLINE void copy_test(const VB &src, VB &dst) {
std::copy(src.begin(), src.end(), dst.begin());
}
NOINLINE void swap_ranges(VB &a, VB &b) {
std::swap_ranges(a.begin(), a.end(), b.begin());
}
NOINLINE VB dummy()
{
VB v(1000 * 1024);
fill_true(v);
return v;
}
int main()
{
using namespace std;
Timer t;
int ret;
const size_t numel = 1<<28;
cerr << "expected best-case memory consumption for " << numel << " bits: " << (numel+7)/8 << " bytes (" << (double((numel+7)/8)/(1<<20)) << " MB)\n\n";
VB v(numel); // optimizes away with libc++!
cout << t.elapsed() << " constructor\n";
v[v.size()-10] = true;
v[v.size()-253] = true;
t.start();
ret = find_true(v);
cout << t.elapsed() << " find_true = " << ret << "\n";
t.start();
ret = count_true(v);
cout << t.elapsed() << " count_true = " << ret << "\n";
t.start();
rotate_64bits(v);
cout << t.elapsed() << " rotate_64bits\n";
t.start();
fill_true(v);
cout << t.elapsed() << " fill_true\n";
VB d (v.size());
t.start();
copy_test(v, d);
cout << t.elapsed() << " copy_test\n";
t.start();
swap_ranges(v, d);
cout << t.elapsed() << " swap_ranges\n";
return 0;
}
@jdtournier
Copy link
Copy Markdown
Author

jdtournier commented Apr 18, 2019

Inspired from this blog post, with the code lifted (with thanks) from the code sample referenced in Peter Cordes' comment.

@jdtournier
Copy link
Copy Markdown
Author

Also tested on MSYS2 (Win10 Core i7) - results are essentially identical, for both g++ and clang++

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment