Skip to content

Instantly share code, notes, and snippets.

@bwedding
Last active June 30, 2021 16:55
Show Gist options
  • Save bwedding/d97c6b1c61e28aca9973817b42a3e97f to your computer and use it in GitHub Desktop.
Save bwedding/d97c6b1c61e28aca9973817b42a3e97f to your computer and use it in GitHub Desktop.
#include <chrono>
#include <algorithm>
#include <functional>
#include <iostream>
#include <random>
#include <string>
// All this should be in a header, but I'm doing this for ease of use in a Gist
// Readability
using StrlenFunc = size_t(*const)(const char*);
using TimePointRef = std::chrono::time_point<std::chrono::steady_clock> &;
using TimePoint = std::chrono::high_resolution_clock::time_point;
using DurationDblRef = std::chrono::duration<double> &;
using DurationDbl = std::chrono::duration<double>;
// Prototypes
void GetTotalTime(DurationDblRef totalTime, TimePointRef startTime);
void TestStrlenFuncs(int loops, int len, std::string &bigString, StrlenFunc, char *);
std::string randString(int len);
void PrintResults(char *funcName, DurationDblRef totalTime);
// Functions to ttest
size_t strlenopt(const char *str);
size_t RickMeyerStrLen(const char *str);
size_t strlenHandrolled(const char *str);
size_t asmstrlen(const char *str);
size_t strchrAsStrlen(const char *str);
// Constants
const int MaxString = 0xfffff0;
const int LongStrLen = 0xfffff;
const int ShortStrLen = 0xff;
const int LongStrLoops = 5000;
const int ShortStrLoops = 100000;
int main()
{
// Generate 1 random string to use for all loops
// but will be indexed into at various parts for
// variability
std::cout << "Creating the big random string" << std::endl;
std::string bigString = randString(MaxString);
// Adjust loops and string len for your system
std::cout << "Long strings of len " << LongStrLen << std::endl;
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, asmstrlen, "asmstrlen");
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, strchrAsStrlen, "strchrAsStrlen");
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, std::strlen, "std::strlen");
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, strlenopt, "GLib strlen");
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, strlenHandrolled, "Handrolled");
TestStrlenFuncs(LongStrLoops, LongStrLen, bigString, RickMeyerStrLen, "RMeyerStrLen");
// Test short strings
std::cout << std::endl << "Short strings of len " << ShortStrLen << std::endl;
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, asmstrlen, "asmstrlen");
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, strchrAsStrlen, "strchrAsStrlen");
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, std::strlen, "std::strlen");
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, strlenopt, "GLib strlen");
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, strlenHandrolled, "Handrolled");
TestStrlenFuncs(ShortStrLoops, ShortStrLen, bigString, RickMeyerStrLen, "RMeyerStrLen");
return 0;
}
// Return time in seconds as a double.
static TimePoint GetTime(void)
{
return std::chrono::high_resolution_clock::now();
}
/* Return the length of the null-terminated string STR. Scan for
the null terminator quickly by testing four bytes at a time. */
size_t strlenopt(const char *str)
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic;
/* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a longword boundary. */
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof(longword) - 1)) != 0;
++char_ptr)
if (*char_ptr == '\0')
return char_ptr - str;
/* All these elucidatory comments refer to 4-byte longwords,
but the theory applies equally well to 8-byte longwords. */
longword_ptr = (unsigned long int *) char_ptr;
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end:
bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof(longword) > 4)
{
/* 64-bit version of the magic. */
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
himagic = ((himagic << 16) << 16) | himagic;
lomagic = ((lomagic << 16) << 16) | lomagic;
}
if (sizeof(longword) > 8)
abort();
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
for (;;)
{
longword = *longword_ptr++;
if (((longword - lomagic) & ~longword & himagic) != 0)
{
/* Which of the bytes was the zero? If none of them were, it was
a misfire; continue the search. */
const char *cp = (const char *)(longword_ptr - 1);
if (cp[0] == 0)
return cp - str;
if (cp[1] == 0)
return cp - str + 1;
if (cp[2] == 0)
return cp - str + 2;
if (cp[3] == 0)
return cp - str + 3;
if (sizeof(longword) > 4)
{
if (cp[4] == 0)
return cp - str + 4;
if (cp[5] == 0)
return cp - str + 5;
if (cp[6] == 0)
return cp - str + 6;
if (cp[7] == 0)
return cp - str + 7;
}
}
}
}
size_t strlenHandrolled(const char *str)
{
const char *p;
if (str == NULL)
return 0;
p = str;
while (*p != '\0')
p++;
return p - str;
}
size_t strchrAsStrlen(const char *str)
{
const char *p = strchr(str, '\0');
return p - str;
}
size_t RickMeyerStrLen(const char* p)
{
auto q = p;
while (*q)
q++;
return q - p;
}
std::string randString(int len)
{
const char*deedle = "|/-\\";
std::mt19937_64 gen{ std::random_device{}() };
std::uniform_int_distribution<short> dist{ 'a', 'z' };
const unsigned length = len + dist(gen);
std::string str(length, '\0');
int i = 0, j = 0;
for (auto& c : str)
{
c = dist(gen);
// Let the user know we're working here
if (j++ > 1000)
{
std::cout << deedle[i++ % strlen(deedle)] << '\b';
j = 0;
}
}
return str;
}
void TestStrlenFuncs(int loops, int len, std::string &bigString, StrlenFunc func, char* printStr)
{
// Don't blow up
_ASSERT(len + 1000 < MaxString);
std::string t;
std::random_device rd; // Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
std::uniform_int_distribution<> dis(1, 1000);
DurationDbl totalTime;
volatile size_t res; // volatile just to ensure there's no optimization
auto startTime = GetTime();
// So what we're doing here is getting a random number between 1 and 1000
// to use as an offset into the big random string we created earlier.
// Then we'll get a sub string from that offset up to the len specified
// by the caller. We'll call the function to benchmark with this substring.
// This way, every function call is getting a different string and no optimization
// can affect our results.
// Creating these substrings ads to the overall benchmark time, but every function
// uses the same code so the overhead shouldn't matter in the test results.
for (int j = 0; j < loops; j++)
{
int offset = dis(gen); // Get a random number to use as an index into bigString
t = bigString.substr(offset, len);
res = func(t.c_str());
}
GetTotalTime(totalTime, startTime);
PrintResults(printStr, totalTime);
}
void GetTotalTime(DurationDblRef totalTime, TimePointRef startTime)
{
totalTime = std::chrono::duration_cast<DurationDbl>(GetTime() - startTime);
}
void PrintResults(char *funcName, DurationDblRef totalTime)
{
std::cout << funcName << ":\t\t\t" << totalTime.count() << " Seconds\n";
}
#ifndef WORDS_BIGENDIAN
#if 0
static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes
{
register int i = 0;
if (!(x & (1 << 0))) i++;
else return i;
if (!(x & (1 << 1))) i++;
else return i;
if (!(x & (1 << 2))) i++;
else return i;
if (!(x & (1 << 3))) i++;
else return i;
if (!(x & (1 << 4))) i++;
else return i;
if (!(x & (1 << 5))) i++;
else return i;
if (!(x & (1 << 6))) i++;
else return i;
if (!(x & (1 << 7))) i++;
else return i;
if (!(x & (1 << 8))) i++;
else return i;
if (!(x & (1 << 9))) i++;
else return i;
if (!(x & (1 << 10))) i++;
else return i;
if (!(x & (1 << 11))) i++;
else return i;
if (!(x & (1 << 12))) i++;
else return i;
if (!(x & (1 << 13))) i++;
else return i;
if (!(x & (1 << 14))) i++;
else return i;
if (!(x & (1 << 15))) i++;
return i;
}
#elif 0
static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes
{
// http://www.hackersdelight.org/: ntz3() shortened for 16-bit mask by Peter Kankowski
register int n = 1;
if ((x & 0x000000FFU) == 0) { n += 8; x >>= 8; }
if ((x & 0x0000000FU) == 0) { n += 4; x >>= 4; }
if ((x & 0x00000003U) == 0) { n += 2; x >>= 2; }
return n - (x & 1);
}
#else
static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes, by Nazo, post: 2009/07/20 03:40
{ // this is current winner for speed
static const unsigned char table[256] =
{
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
};
if ((unsigned char)x)
return table[(unsigned char)x];
return table[x >> 8] + 8; // t[x / 256] + 8
}
#endif
#else
#if 0
static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes
{
register int i = 0;
if (!(x & (1 << 15))) i++;
else return i;
if (!(x & (1 << 14))) i++;
else return i;
if (!(x & (1 << 13))) i++;
else return i;
if (!(x & (1 << 12))) i++;
else return i;
if (!(x & (1 << 11))) i++;
else return i;
if (!(x & (1 << 10))) i++;
else return i;
if (!(x & (1 << 9))) i++;
else return i;
if (!(x & (1 << 8))) i++;
else return i;
if (!(x & (1 << 7))) i++;
else return i;
if (!(x & (1 << 6))) i++;
else return i;
if (!(x & (1 << 5))) i++;
else return i;
if (!(x & (1 << 4))) i++;
else return i;
if (!(x & (1 << 3))) i++;
else return i;
if (!(x & (1 << 2))) i++;
else return i;
if (!(x & (1 << 1))) i++;
else return i;
if (!(x & (1 << 0))) i++;
return i;
}
#else
static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes
{
// http://www.hackersdelight.org/: nlz1() shortened for 16-bit mask
register int n = 0;
if (x <= 0x000000FFU) { n = n + 8; x = x << 8; }
if (x <= 0x00000FFFU) { n = n + 4; x = x << 4; }
if (x <= 0x00003FFFU) { n = n + 2; x = x << 2; }
if (x <= 0x00007FFFU) { n = n + 1; }
return n;
}
#endif
#endif
size_t asmstrlen(const char *str)
{
register size_t len = 0;
// align to 16 bytes
while ((((intptr_t)str) & (sizeof(__m128i) - 1)) != 0)
{
if (*str++ == 0)
return len;
++len;
}
// search for 0
__m128i xmm0 = _mm_setzero_si128();
__m128i xmm1;
int mask = 0;
for (;;)
{
xmm1 = _mm_load_si128((__m128i *)str);
xmm1 = _mm_cmpeq_epi8(xmm1, xmm0);
if ((mask = _mm_movemask_epi8(xmm1)) != 0)
{
// got 0 somewhere within 16 bytes in xmm1, or within 16 bits in mask
// find index of first set bit
#ifndef _DISABLE_ASM_BSF // define it to disable ASM
#if (_MSC_VER >= 1300) // make sure <intrin.h> is included
unsigned long pos;
_BitScanForward(&pos, mask);
len += (size_t)pos;
#elif defined(_MSC_VER) // earlier MSVC's do not have _BitScanForward, use inline asm
__asm bsf edx, mask; edx = bsf(mask)
__asm add edx, len; edx += len
__asm mov len, edx; len = edx
#elif ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))) // modern GCC has built-in __builtin_ctz
len += __builtin_ctz(mask);
#elif defined(__GNUC__) // older GCC shall use inline asm
unsigned int pos;
asm("bsf %1, %0" : "=r" (pos) : "rm" (mask));
len += (size_t)pos;
#else // none of choices exist, use local BSF implementation
len += count_bits_to_0(mask);
#endif
#else
len += count_bits_to_0(mask);
#endif
break;
}
str += sizeof(__m128i);
len += sizeof(__m128i);
}
return len;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment