Skip to content

Instantly share code, notes, and snippets.

@BreadFish64
Created December 20, 2020 22:57
Show Gist options
  • Save BreadFish64/9dbde294bb6988ace7dea02739754bf0 to your computer and use it in GitHub Desktop.
Save BreadFish64/9dbde294bb6988ace7dea02739754bf0 to your computer and use it in GitHub Desktop.
#include <algorithm>
#include <array>
#include <bit>
#include <chrono>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <random>
#include <string>
#include <fmt/chrono.h>
#include <fmt/ostream.h>
#include <immintrin.h>
using namespace std::literals;
[[nodiscard]] std::size_t STRLEN(const char* str) {
const char* terminator = str;
while (reinterpret_cast<std::size_t>(terminator) % sizeof(__m256i)) {
if (*terminator == '\0') return terminator - str;
++terminator;
}
const auto zero = _mm256_setzero_si256();
const auto ones = _mm256_set1_epi8(0xFF);
auto block = _mm256_undefined_si256();
do {
block = _mm256_load_si256(reinterpret_cast<const __m256i*>(terminator));
block = _mm256_cmpeq_epi8(block, zero);
terminator += sizeof(__m256i);
} while (_mm256_testz_si256(block, ones));
std::uint32_t mask = _mm256_movemask_epi8(block);
terminator -= (sizeof(__m256i) - std::countr_zero(mask));
return terminator - str;
}
[[noinline]] void Test(decltype(&std::strlen) strlen, const char* str, std::string_view name,
std::chrono::high_resolution_clock::duration& total_duration) {
auto start = std::chrono::high_resolution_clock::now();
auto len = strlen(str);
auto end = std::chrono::high_resolution_clock::now();
auto duration = end - start;
// fmt::print("{}:\nLength: {}, Time: {}\n", name, len, duration);
total_duration += duration;
}
int main() {
std::srand(std::chrono::system_clock::now().time_since_epoch().count());
constexpr unsigned magnitudes = 22;
constexpr unsigned count = 1024;
std::string template_str(1 << magnitudes, ' ');
std::default_random_engine rand{};
std::ranges::generate(template_str, [&rand]() {
char c = '\0';
while (!c) c = rand();
return c;
});
fmt::print(R"(
Strlen Experiment
Average characters per second over {} iterations per magnitude of length
Magnitude stdlib avx2 % speed
)", count);
for (unsigned magnitude{0}; magnitude < magnitudes; ++magnitude) {
std::size_t total_length{0};
std::chrono::high_resolution_clock::duration total_standard_duration{};
std::chrono::high_resolution_clock::duration total_special_duration{};
std::array tests{
std::make_tuple(&std::strlen, &total_standard_duration, "Standard"),
std::make_tuple(&STRLEN, &total_special_duration, "Special"),
};
for (unsigned i{0}; i < count; ++i) {
std::size_t length = (1 << magnitude) + (rand() % (1 << magnitude));
total_length += length;
std::string str(template_str.data(), length);
for (auto [func, total, name] : tests) Test(func, str.data(), name, *total);
std::shuffle(tests.begin(), tests.end(), rand);
}
auto avg_standard =
total_length * std::chrono::duration<double>{1} / total_standard_duration;
auto avg_special =
total_length * std::chrono::duration<double>{1} / total_special_duration;
fmt::print(R"(2^{} {:.0f} {:.0f} {:.1f}%
)",
magnitude, avg_standard, avg_special, avg_special / avg_standard * 100);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment