Last active
August 29, 2015 14:09
-
-
Save aktau/11975b15dfb8d5504a58 to your computer and use it in GitHub Desktop.
neovim: strchr() test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <limits.h> | |
#include <time.h> | |
#include <stdbool.h> | |
/* test if Vim was right when it implemented strchr() again as vim_strbyte() */ | |
#define NUL '\0' | |
#define NL '\n' | |
/* define some benchmark timing code */ | |
#ifdef __MACH__ | |
#include <mach/mach_time.h> | |
double ns_conversion_factor; | |
double us_conversion_factor; | |
double ms_conversion_factor; | |
void timeinit() { | |
mach_timebase_info_data_t timebase; | |
mach_timebase_info(&timebase); | |
ns_conversion_factor = (double)timebase.numer / (double)timebase.denom; | |
us_conversion_factor = (double)timebase.numer / (double)timebase.denom / 1000; | |
ms_conversion_factor = (double)timebase.numer / (double)timebase.denom / 1000000; | |
} | |
double nsticks() { | |
return mach_absolute_time() * ns_conversion_factor; | |
} | |
double msticks() { | |
return mach_absolute_time() * ms_conversion_factor; | |
} | |
#else | |
void timeinit() { | |
/* do nothing */ | |
} | |
double nsticks() { | |
timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return ((double)ts.tv_sec) / 1000000000 + ((double)ts.tv_nsec); | |
} | |
double msticks() { | |
timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return ((double)ts.tv_sec) / 1000 + ((double)ts.tv_nsec) * 1000000; | |
} | |
#endif | |
typedef char *(*strchrfn_t)(const char *, int); | |
/* | |
* Version of strchr() that only works for bytes and handles unsigned char | |
* strings with characters above 128 correctly. It also doesn't return a | |
* pointer to the NUL at the end of the string. | |
*/ | |
char *vim_strbyte(const char *string, int c) | |
{ | |
unsigned char *p = (unsigned char *) string; | |
while (*p != NUL) { | |
if (*p == c) | |
return (char *) p; | |
++p; | |
} | |
return NULL; | |
} | |
/* a wrapper around strchr that brings it more in line with vim_strbyte() */ | |
char *akt_strbyte(const char *str, int c) { | |
if (c == 0) { | |
return NULL; /* Vim prefers returning NULL instead of the end of string */ | |
} | |
if (c > (unsigned char) -1) { | |
/* vim_strbyte() never casts c to int, which means that if c >= 255, it | |
* simply never matches and will always return NULL, but not before | |
* running through the entire string first. We emulate this behaviour by | |
* just returning NULL immediately. */ | |
return NULL; | |
} | |
return strchr(str, c); | |
} | |
/* short version of akt_strbyte() above */ | |
char *akt_strbyte_short(const char *str, int c) { | |
return (c > 0 && c <= (unsigned char) -1) ? strchr(str, c) : NULL; | |
} | |
static void compar(strchrfn_t fn1, strchrfn_t fn2, char *str, int c) { | |
char *found1 = fn1(str, c); | |
char *found2 = fn2(str, c); | |
if (found1 != found2) { | |
printf("looking for %d (char: %d): %p = str[%zu]/%p = str[%zu], strlen(str) = %zu\n", | |
c, (int) (char) c, found1, found1 - str, found2, found2 - str, strlen(str)); | |
assert(found1 == found2); | |
} | |
} | |
/* my_strchr() and check_strchr_correctness() below are part of a bugzilla | |
* bugreport. */ | |
void my_strchr(strchrfn_t fn, const char *s, int c) { | |
char *r = fn(s, c); | |
printf("strchr(%d [%d]) = [%d]\n", | |
(int) (unsigned char) c, c, | |
(int) (unsigned char) (r ? *r : 0)); | |
} | |
/* check if strchr is faulty, see | |
* https://sourceware.org/bugzilla/show_bug.cgi?id=12159 */ | |
static void check_strchr_correctness(strchrfn_t fn) { | |
static const char s[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0}; | |
char ch = 228; | |
int d = 228; | |
my_strchr(fn, s, ch); | |
my_strchr(fn, s, (int) ch); | |
my_strchr(fn, s, (unsigned int) ch); | |
my_strchr(fn, s, (unsigned char) ch); | |
my_strchr(fn, s, d); | |
} | |
#define benchmark(fn, str, rounds, use_char) \ | |
benchmarkfn(fn, #fn, str, rounds, use_char) | |
void benchmarkfn(strchrfn_t fn, const char *fnname, const char *str, size_t rounds, bool use_char) { | |
size_t nulls = 0; | |
uint64_t t1 = nsticks(); | |
for (int i = 1; i < rounds; ++i) { | |
char *res = fn(str, use_char ? (unsigned char) i : i); | |
if (!res) { | |
nulls++; | |
} | |
} | |
uint64_t t2 = nsticks(); | |
printf("%s: %8.2f ns per call, %s, nulls counted = %zu\n", | |
use_char ? "chr" : "int", (double) (t2 - t1) / (double) rounds, fnname, nulls); | |
} | |
#define CHECK_STRCHR(fn) \ | |
do { \ | |
printf("checking " #fn ":\n"); \ | |
check_strchr_correctness(fn); \ | |
} while (0) | |
int main() { | |
/* check for basic correctness */ | |
CHECK_STRCHR(strchr); | |
CHECK_STRCHR(akt_strbyte); | |
CHECK_STRCHR(vim_strbyte); | |
char str[256]; | |
/* fill up `str` with all possible bytes that fit in a char */ | |
for (int i = 0; i < sizeof(str); ++i) { | |
str[i] = (sizeof(str) - 1) - i; | |
} | |
/* try to search all sorts of bytes inside of `str`, compare the output | |
* of two different strchr()-like functions */ | |
for (int i = 1; i < 0x10000; ++i) { | |
compar(akt_strbyte_short, vim_strbyte, str, i); | |
} | |
/* test how an implementation responds to NUL */ | |
compar(akt_strbyte, vim_strbyte, str, NUL); | |
/* benchmark */ | |
timeinit(); | |
#define ROUNDS 0x10000 | |
/* with a short string */ | |
printf("benchmarking finding a needle in a short string: %zu chars, %d rounds\n", strlen(str), ROUNDS); | |
benchmark(akt_strbyte_short, str, ROUNDS, true); | |
benchmark(akt_strbyte, str, ROUNDS, true); | |
benchmark(strchr, str, ROUNDS, true); | |
benchmark(vim_strbyte, str, ROUNDS, true); | |
benchmark(akt_strbyte_short, str, ROUNDS, false); | |
benchmark(akt_strbyte, str, ROUNDS, false); | |
benchmark(strchr, str, ROUNDS, false); | |
benchmark(vim_strbyte, str, ROUNDS, false); | |
/* with a longer string filled with random bytes */ | |
srand(time(NULL)); | |
size_t longstrlen = 1 * 256 * 1024; | |
char *longstr = malloc(longstrlen); /* several MBs of string */ | |
for (int i = 0; i < longstrlen; ++i) { | |
char c = rand() % 255 + 1; | |
longstr[i] = c; | |
} | |
longstr[longstrlen - 1] = NUL; | |
printf("benchmarking finding a needle in a longer string: %zu chars\n", strlen(longstr)); | |
benchmark(akt_strbyte_short, longstr, ROUNDS, true); | |
benchmark(akt_strbyte, longstr, ROUNDS, true); | |
benchmark(strchr, longstr, ROUNDS, true); | |
benchmark(vim_strbyte, longstr, ROUNDS, true); | |
benchmark(akt_strbyte_short, longstr, ROUNDS, false); | |
benchmark(akt_strbyte, longstr, ROUNDS, false); | |
benchmark(strchr, longstr, ROUNDS, false); | |
benchmark(vim_strbyte, longstr, ROUNDS, false); | |
memset(longstr, 12, longstrlen); | |
longstr[longstrlen - 1] = NUL; | |
printf("benchmarking finding a needle that's usually not there: %zu chars\n", strlen(longstr)); | |
benchmark(akt_strbyte_short, longstr, ROUNDS, false); | |
benchmark(akt_strbyte, longstr, ROUNDS, false); | |
benchmark(strchr, longstr, ROUNDS, false); | |
benchmark(vim_strbyte, longstr, ROUNDS, false); | |
free(longstr); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment