Created
May 15, 2024 15:59
-
-
Save flaviut/b54f7827c2912f88fd8d50feb64c803f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iomanip> | |
#include <pico/divider.h> | |
#include "fpm/fixed.hpp" | |
// from https://github.com/JoelFilho/EMB | |
#include "emb.hpp" | |
#include "smartdevice/Debug.hpp" | |
#pragma GCC optimize("O3") | |
#define DECL_BENCH(suffix, conversion, count) \ | |
template<typename State> \ | |
void benchmark_conv_ ## suffix(State &s) { \ | |
for (auto _: s) { \ | |
for (int i = 0; i < count; i++) \ | |
emb::dontOptimize(conversion(i)); \ | |
} \ | |
} \ | |
template<typename State> \ | |
void benchmark_div_ ## suffix(State &s) { \ | |
for (auto _: s) { \ | |
for (int i = 0; i < count; i++) \ | |
emb::dontOptimize(1/conversion(i)); \ | |
} \ | |
} \ | |
template<typename State> \ | |
void benchmark_mul_ ## suffix(State &s) { \ | |
for (auto _: s) { \ | |
static auto s = conversion(5454.13249); \ | |
for (int i = 0; i < count; i++) \ | |
emb::dontOptimize(conversion(i)*s); \ | |
} \ | |
} \ | |
template<typename State> \ | |
void benchmark_add_ ## suffix(State &s) { \ | |
for (auto _: s) { \ | |
static auto s = conversion(5454.13249); \ | |
for (int i = 0; i < count; i++) \ | |
emb::dontOptimize(conversion(i)+s); \ | |
} \ | |
} | |
#define RUN_BENCH(suffix) \ | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_conv_ ## suffix); \ | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_div_ ## suffix); \ | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_mul_ ## suffix); \ | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_add_ ## suffix); | |
typedef fpm::fixed<int16_t, int32_t, 8> fixed_8_8; | |
DECL_BENCH(i8, int8_t, 10000) | |
DECL_BENCH(i16, int16_t, 10000) | |
DECL_BENCH(i32, int32_t, 10000) | |
DECL_BENCH(i64, int64_t, 10000) | |
DECL_BENCH(f, float, 1000) | |
DECL_BENCH(d, double, 1000) | |
DECL_BENCH(fgg, fpm::fixed_16_16::from_raw_value, 10000) | |
DECL_BENCH(f88, fixed_8_8::from_raw_value, 10000) | |
template<typename State> | |
void benchmark_hdiv_i32(State &s) { | |
for (auto _: s) { | |
for (int i = 0; i < 10000; i++) | |
emb::dontOptimize(div_s32s32(1, int32_t(i))); | |
} | |
} | |
template<typename State> | |
void benchmark_hdiv_i64(State &s) { | |
for (auto _: s) { | |
for (int i = 0; i < 10000; i++) | |
emb::dontOptimize(div_s64s64(1, i)); | |
} | |
} | |
template<typename State> | |
void benchmark_uhdiv_i32(State &s) { | |
for (auto _: s) { | |
for (int i = 0; i < 10000; i++) | |
emb::dontOptimize(hw_divider_s32_quotient_inlined(1, int32_t(i))); | |
} | |
} | |
struct Reporter { | |
template<typename Accumulator> | |
static void report(const char *name, size_t iterations, Accumulator mean, Accumulator sd) { | |
std::cout << std::setw(20) | |
<< name << '\t' | |
<< iterations << '\t' | |
<< mean << "us\t" | |
<< sd << "us\n"; | |
} | |
}; | |
struct cpu_timer { | |
static int64_t now() { | |
return time_us_64(); | |
} | |
}; | |
using Benchmarker = emb::Benchmarker<cpu_timer>; | |
[[noreturn]] int main() { | |
Debug::init(); | |
sleep_ms(3000); | |
std::cout << "Benchmarking...\n"; | |
Benchmarker benchmarker(1000); | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i32) | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i64) | |
EMB_MAKE_BENCHMARK(benchmarker, benchmark_uhdiv_i32) | |
RUN_BENCH(i8) | |
RUN_BENCH(i16) | |
RUN_BENCH(i32) | |
RUN_BENCH(i64) | |
RUN_BENCH(f) | |
RUN_BENCH(d) | |
RUN_BENCH(fgg) | |
RUN_BENCH(f88) | |
benchmarker.runBenchmarks<Reporter>(); | |
std::cout << "Done :D\n"; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
operation | type | Inner loops | mean μs | mean hw μs | cycles | speedup | Property | |
---|---|---|---|---|---|---|---|---|
int conversion | int8 | 10000 | 334 | 335 | 4 | 0.997 | ||
division | int8 | 10000 | 3980 | 5108 | 50 | 0.779 | ||
multiplication | int8 | 10000 | 737 | 737 | 9 | 1 | ||
addition | int8 | 10000 | 734 | 736 | 9 | 0.997 | ||
int conversion | int16 | 10000 | 332 | 330 | 4 | 1.006 | ||
division | int16 | 10000 | 2676 | 5103 | 33 | 0.524 | ||
multiplication | int16 | 10000 | 655 | 654 | 8 | 1.002 | ||
addition | int16 | 10000 | 655 | 654 | 8 | 1.002 | ||
int conversion | int32 | 10000 | 332 | 331 | 4 | 1.003 | ||
division | int32 | 10000 | 2676 | 5102 | 33 | 0.525 | ||
multiplication | int32 | 10000 | 572 | 574 | 7 | 0.997 | ||
addition | int32 | 10000 | 573 | 576 | 7 | 0.995 | ||
int conversion | int64 | 10000 | 412 | 412 | 5 | 1 | ||
division | int64 | 10000 | 17877 | 8013 | 223 | 2.231 | ||
multiplication | int64 | 10000 | 6720 | 6720 | 84 | 1 | ||
addition | int64 | 10000 | 1061 | 1060 | 13 | 1.001 | ||
int conversion | float | 1000 | 672 | 670 | 84 | 1.003 | ||
division | float | 1000 | 4436 | 4434 | 555 | 1 | ||
multiplication | float | 1000 | 1972 | 1972 | 247 | 1 | ||
addition | float | 1000 | 1552 | 1551 | 194 | 1.001 | ||
int conversion | double | 1000 | 664 | 662 | 83 | 1.003 | ||
division | double | 1000 | 6577 | 4936 | 822 | 1.332 | ||
multiplication | double | 1000 | 3541 | 3540 | 443 | 1 | ||
addition | double | 1000 | 1892 | 1892 | 237 | 1 | ||
int conversion | fixed_16_16 | 10000 | 332 | 330 | 4 | 1.006 | ||
division | fixed_16_16 | 10000 | 54347 | 11408 | 679 | 4.764 | ||
multiplication | fixed_16_16 | 10000 | 13755 | 13755 | 172 | 1 | ||
addition | fixed_16_16 | 10000 | 572 | 574 | 7 | 0.997 | ||
int conversion | fixed_8_8 | 10000 | 330 | 331 | 4 | 0.997 | ||
division | fixed_8_8 | 10000 | 6300 | 5423 | 79 | 1.162 | ||
multiplication | fixed_8_8 | 10000 | 1544 | 1545 | 19 | 0.999 | ||
addition | fixed_8_8 | 10000 | 574 | 574 | 7 | 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment