Last active
March 1, 2022 02:25
-
-
Save vittorioromeo/efa005d44ccd4ec7279181768a0c1f0b to your computer and use it in GitHub Desktop.
`libstdc++` debug performance benchmarks with `[[gnu::always_inline]]`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define NDEBUG 1 | |
#include <benchmark/benchmark.h> | |
#include <vector> | |
#include <algorithm> | |
#include <numeric> | |
static void vector_squareop(benchmark::State& state) | |
{ | |
std::vector<int> v; | |
v.resize(1000); | |
for(auto _ : state) | |
{ | |
for(int i = 0; i < 1000; ++i) | |
{ | |
benchmark::DoNotOptimize(++(v[i])); | |
} | |
} | |
} | |
static void carray_squareop(benchmark::State& state) | |
{ | |
int* v = new int[1000]{}; | |
for(auto _ : state) | |
{ | |
for(int i = 0; i < 1000; ++i) | |
{ | |
benchmark::DoNotOptimize(++(v[i])); | |
} | |
} | |
delete[] v; | |
} | |
static void vector_iter(benchmark::State& state) | |
{ | |
std::vector<int> v; | |
v.resize(1000); | |
for(auto _ : state) | |
{ | |
for(int i = 0; i < 1000; ++i) | |
{ | |
benchmark::DoNotOptimize(++(*(v.begin() + i))); | |
} | |
} | |
} | |
static void carray_iter(benchmark::State& state) | |
{ | |
int v[1000]{}; | |
for(auto _ : state) | |
{ | |
for(int i = 0; i < 1000; ++i) | |
{ | |
benchmark::DoNotOptimize(++(*(v + i))); | |
} | |
} | |
} | |
static std::vector<unsigned int> make_test_container() | |
{ | |
std::vector<unsigned int> v; | |
v.reserve(64000); | |
for(std::size_t i = 0; i < 64000; ++i) | |
{ | |
v.push_back(i); | |
} | |
return v; | |
} | |
static unsigned int sum_vector_accumulate(const std::vector<unsigned int>& v) | |
{ | |
return std::accumulate(std::begin(v), std::end(v), 0u); | |
} | |
static unsigned int sum_vector_rawloop(const std::vector<unsigned int>& v) | |
{ | |
unsigned int result = 0; | |
const std::size_t size = v.size(); | |
const unsigned int* const data = v.data(); | |
for(std::size_t i = 0; i < size; ++i) | |
{ | |
result += data[i]; | |
} | |
return result; | |
} | |
static void sumvec_accumulate(benchmark::State& state) | |
{ | |
std::vector<unsigned int> v = make_test_container(); | |
for(auto _ : state) | |
{ | |
benchmark::DoNotOptimize(sum_vector_accumulate(v)); | |
} | |
} | |
static void sumvec_rawloop(benchmark::State& state) | |
{ | |
std::vector<unsigned int> v = make_test_container(); | |
for(auto _ : state) | |
{ | |
benchmark::DoNotOptimize(sum_vector_rawloop(v)); | |
} | |
} | |
BENCHMARK(vector_squareop); | |
BENCHMARK(carray_squareop); | |
BENCHMARK(vector_iter); | |
BENCHMARK(carray_iter); | |
BENCHMARK(sumvec_accumulate); | |
BENCHMARK(sumvec_rawloop); | |
BENCHMARK_MAIN(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
On MSYS2/MinGW on Windows 10 x64 | |
gcc version 11.2.0 (Rev9, Built by MSYS2 project) | |
Run on (16 X 3600 MHz CPU s) | |
CPU Caches: | |
L1 Data 32 KiB (x8) | |
L1 Instruction 32 KiB (x8) | |
L2 Unified 256 KiB (x8) | |
L3 Unified 16384 KiB (x1) | |
CPU: Intel Core i9-9900K | |
MOBO: GIGABYTE Z390 AORUS MASTER-C | |
RAM: Corsair CMK16GX4M2B3000C15, 4x8GB | |
g++ -std=c++20 -O<X> | |
-pthread | |
./bench0.cpp | |
-lbenchmark -o bench0.exe | |
&& ./bench0.exe | |
--benchmark_repetitions=3 | |
--benchmark_min_time=1 | |
--benchmark_enable_random_interleaving=true | |
--benchmark_report_aggregates_only=true | |
=============================================================================== | |
-O0, without `[[gnu::always_inline]]` on `operator[]` | |
----------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
----------------------------------------------------------------- | |
carray_squareop_mean 1738 ns 1739 ns 3 | |
carray_squareop_median 1699 ns 1707 ns 3 | |
carray_squareop_stddev 70.8 ns 72.6 ns 3 | |
carray_squareop_cv 4.07 % 4.18 % 3 | |
vector_squareop_mean 2398 ns 2390 ns 3 | |
vector_squareop_median 2333 ns 2344 ns 3 | |
vector_squareop_stddev 114 ns 106 ns 3 | |
vector_squareop_cv 4.77 % 4.42 % 3 | |
-Og, without `[[gnu::always_inline]]` on `operator[]` | |
----------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
----------------------------------------------------------------- | |
carray_squareop_mean 440 ns 439 ns 3 | |
carray_squareop_median 439 ns 439 ns 3 | |
carray_squareop_stddev 2.53 ns 0.000 ns 3 | |
carray_squareop_cv 0.57 % 0.00 % 3 | |
vector_squareop_mean 661 ns 662 ns 3 | |
vector_squareop_median 660 ns 660 ns 3 | |
vector_squareop_stddev 2.66 ns 4.33 ns 3 | |
vector_squareop_cv 0.40 % 0.65 % 3 | |
-O0, with `[[gnu::always_inline]]` on `operator[]` | |
----------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
----------------------------------------------------------------- | |
vector_squareop_mean 1956 ns 1953 ns 3 | |
vector_squareop_median 1956 ns 1946 ns 3 | |
vector_squareop_stddev 5.65 ns 12.1 ns 3 | |
vector_squareop_cv 0.29 % 0.62 % 3 | |
carray_squareop_mean 1790 ns 1784 ns 3 | |
carray_squareop_median 1787 ns 1784 ns 3 | |
carray_squareop_stddev 9.78 ns 19.2 ns 3 | |
carray_squareop_cv 0.55 % 1.08 % 3 | |
-Og, with `[[gnu::always_inline]]` on `operator[]` | |
----------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
----------------------------------------------------------------- | |
carray_squareop_mean 440 ns 439 ns 3 | |
carray_squareop_median 440 ns 439 ns 3 | |
carray_squareop_stddev 1.18 ns 0.000 ns 3 | |
carray_squareop_cv 0.27 % 0.00 % 3 | |
vector_squareop_mean 494 ns 491 ns 3 | |
vector_squareop_median 491 ns 491 ns 3 | |
vector_squareop_stddev 5.52 ns 0.000 ns 3 | |
vector_squareop_cv 1.12 % 0.00 % 3 | |
=============================================================================== | |
-O0, without any change to `libstdc++` implementation | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
vector_iter_mean 7652 ns 7633 ns 3 | |
vector_iter_median 7631 ns 7605 ns 3 | |
vector_iter_stddev 37.8 ns 49.3 ns 3 | |
vector_iter_cv 0.49 % 0.65 % 3 | |
carray_iter_mean 1894 ns 1897 ns 3 | |
carray_iter_median 1896 ns 1904 ns 3 | |
carray_iter_stddev 5.48 ns 12.1 ns 3 | |
carray_iter_cv 0.29 % 0.64 % 3 | |
-Og, without any change to `libstdc++` implementation | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
carray_iter_mean 455 ns 453 ns 3 | |
carray_iter_median 454 ns 450 ns 3 | |
carray_iter_stddev 4.27 ns 6.04 ns 3 | |
carray_iter_cv 0.94 % 1.33 % 3 | |
vector_iter_mean 506 ns 506 ns 3 | |
vector_iter_median 495 ns 497 ns 3 | |
vector_iter_stddev 22.1 ns 20.5 ns 3 | |
vector_iter_cv 4.37 % 4.04 % 3 | |
-O0, mark `std::vector::begin` as `[[gnu::always_inline]]` | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
carray_iter_mean 1892 ns 1889 ns 3 | |
carray_iter_median 1889 ns 1882 ns 3 | |
carray_iter_stddev 6.93 ns 13.1 ns 3 | |
carray_iter_cv 0.37 % 0.69 % 3 | |
vector_iter_mean 6827 ns 6849 ns 3 | |
vector_iter_median 6832 ns 6824 ns 3 | |
vector_iter_stddev 19.0 ns 43.3 ns 3 | |
vector_iter_cv 0.28 % 0.63 % 3 | |
-O0, mark `std::vector::begin` as `[[gnu::always_inline]]` and | |
mark `__normal_iterator` constructor, `operator+`, and `operator*` | |
as `[[gnu::always_inline]]` | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
carray_iter_mean 1889 ns 1890 ns 3 | |
carray_iter_median 1890 ns 1883 ns 3 | |
carray_iter_stddev 7.14 ns 12.1 ns 3 | |
carray_iter_cv 0.38 % 0.64 % 3 | |
vector_iter_mean 5424 ns 5435 ns 3 | |
vector_iter_median 5420 ns 5455 ns 3 | |
vector_iter_stddev 22.8 ns 34.2 ns 3 | |
vector_iter_cv 0.42 % 0.63 % 3 | |
-Og, mark `std::vector::begin` as `[[gnu::always_inline]]` | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
carray_iter_mean 459 ns 459 ns 3 | |
carray_iter_median 462 ns 460 ns 3 | |
carray_iter_stddev 5.70 ns 7.72 ns 3 | |
carray_iter_cv 1.24 % 1.68 % 3 | |
vector_iter_mean 495 ns 495 ns 3 | |
vector_iter_median 494 ns 495 ns 3 | |
vector_iter_stddev 2.10 ns 5.75 ns 3 | |
vector_iter_cv 0.42 % 1.16 % 3 | |
-Og, mark `std::vector::begin` as `[[gnu::always_inline]]` and | |
mark `__normal_iterator` constructor, `operator+`, and `operator*` | |
as `[[gnu::always_inline]]` | |
------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------- | |
carray_iter_mean 454 ns 455 ns 3 | |
carray_iter_median 454 ns 455 ns 3 | |
carray_iter_stddev 2.97 ns 5.06 ns 3 | |
carray_iter_cv 0.65 % 1.11 % 3 | |
vector_iter_mean 493 ns 493 ns 3 | |
vector_iter_median 492 ns 491 ns 3 | |
vector_iter_stddev 2.51 ns 3.22 ns 3 | |
vector_iter_cv 0.51 % 0.65 % 3 | |
=============================================================================== | |
-O0, without any change to `libstdc++` implementation | |
------------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------- | |
sumvec_rawloop_mean 111015 ns 111084 ns 3 | |
sumvec_rawloop_median 111040 ns 111084 ns 3 | |
sumvec_rawloop_stddev 95.0 ns 0.000 ns 3 | |
sumvec_rawloop_cv 0.09 % 0.00 % 3 | |
sumvec_accumulate_mean 448967 ns 450040 ns 3 | |
sumvec_accumulate_median 449337 ns 450040 ns 3 | |
sumvec_accumulate_stddev 812 ns 0.000 ns 3 | |
sumvec_accumulate_cv 0.18 % 0.00 % 3 | |
-Og, without any change to `libstdc++` implementation | |
------------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------- | |
sumvec_accumulate_mean 28104 ns 28041 ns 3 | |
sumvec_accumulate_median 28275 ns 28250 ns 3 | |
sumvec_accumulate_stddev 328 ns 362 ns 3 | |
sumvec_accumulate_cv 1.17 % 1.29 % 3 | |
sumvec_rawloop_mean 17851 ns 17787 ns 3 | |
sumvec_rawloop_median 17710 ns 17578 ns 3 | |
sumvec_rawloop_stddev 356 ns 362 ns 3 | |
sumvec_rawloop_cv 2.00 % 2.04 % 3 | |
-O0, mark `std::move` as `[[gnu::always_inline]]` | |
------------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------- | |
sumvec_rawloop_mean 110983 ns 110677 ns 3 | |
sumvec_rawloop_median 110880 ns 110212 ns 3 | |
sumvec_rawloop_stddev 218 ns 805 ns 3 | |
sumvec_rawloop_cv 0.20 % 0.73 % 3 | |
sumvec_accumulate_mean 372487 ns 372522 ns 3 | |
sumvec_accumulate_median 372706 ns 372522 ns 3 | |
sumvec_accumulate_stddev 693 ns 0.000 ns 3 | |
sumvec_accumulate_cv 0.19 % 0.00 % 3 | |
-O0, mark `std::move` as `[[gnu::always_inline]]` and mark | |
`__normal_iterator` constructor, `operator+`, `operator*`, and | |
`operator!=`, as `[[gnu::always_inline]]` | |
------------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------- | |
sumvec_rawloop_mean 112385 ns 112305 ns 3 | |
sumvec_rawloop_median 112405 ns 112305 ns 3 | |
sumvec_rawloop_stddev 126 ns 0.000 ns 3 | |
sumvec_rawloop_cv 0.11 % 0.00 % 3 | |
sumvec_accumulate_mean 164825 ns 164339 ns 3 | |
sumvec_accumulate_median 163806 ns 163060 ns 3 | |
sumvec_accumulate_stddev 1970 ns 2215 ns 3 | |
sumvec_accumulate_cv 1.19 % 1.35 % 3 | |
-Og, mark `std::move` as `[[gnu::always_inline]]` and mark | |
`__normal_iterator` constructor, `operator+`, `operator*`, and | |
`operator!=`, as `[[gnu::always_inline]]` | |
------------------------------------------------------------------- | |
Benchmark Time CPU Iterations | |
------------------------------------------------------------------- | |
sumvec_rawloop_mean 17541 ns 17520 ns 3 | |
sumvec_rawloop_median 17553 ns 17456 ns 3 | |
sumvec_rawloop_stddev 38.9 ns 111 ns 3 | |
sumvec_rawloop_cv 0.22 % 0.63 % 3 | |
sumvec_accumulate_mean 27646 ns 27768 ns 3 | |
sumvec_accumulate_median 27704 ns 27867 ns 3 | |
sumvec_accumulate_stddev 102 ns 171 ns 3 | |
sumvec_accumulate_cv 0.37 % 0.62 % 3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment