Created
July 28, 2010 19:42
-
-
Save karthick18/495979 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* DONT USE strncpy | |
* Compile with -m32 if on x86_64 | |
* Moral of the story: If you have large buffers to be strncpy'ed, | |
* dont use it and use a strncat by zeroing off the first byte in the destination. which is | |
* atleast 2x faster based on the bytes left to be zeroed by strncpy to the destination. | |
* In short, dont use strncpy :-) | |
*/ | |
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <sched.h> | |
#include <sys/mman.h> | |
#include <assert.h> | |
/* | |
* poor mans profiler. cpu tsc for x86 though would be skewed with cpufreq sched,etc. | |
* but still better for approximations. | |
*/ | |
#define rdtsc(x) do { asm __volatile__("rdtsc":"=A"(x)::); }while(0) | |
/* | |
* flush instr. cacheline before rdtsc | |
*/ | |
#define barrier() do { asm __volatile__("cpuid":::"memory"); }while(0) | |
#ifdef FAKE_STRNCPY | |
#define strncpy xstrncat | |
static int fake = 1; | |
#else | |
#define strncpy xstrncpy | |
static int fake; | |
#endif | |
/* | |
* From linux kernel arch/x86/lib/string_32.c | |
*/ | |
#ifndef FAKE_STRNCPY | |
char *xstrncpy(char *dest, const char *src, size_t count) | |
{ | |
int d0, d1, d2, d3; | |
asm volatile("1:\tdecl %2\n\t" | |
"js 2f\n\t" | |
"lodsb\n\t" | |
"stosb\n\t" | |
"testb %%al,%%al\n\t" | |
"jne 1b\n\t" | |
"rep\n\t" | |
"stosb\n" | |
"2:" | |
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) | |
: "0" (src), "1" (dest), "2" (count) : "memory"); | |
return dest; | |
} | |
#else | |
char *xstrncat(char *dest, const char *src, size_t count) | |
{ | |
int d0, d1, d2, d3; | |
asm volatile("repne\n\t" | |
"scasb\n\t" | |
"decl %1\n\t" | |
"movl %8,%3\n" | |
"1:\tdecl %3\n\t" | |
"js 2f\n\t" | |
"lodsb\n\t" | |
"stosb\n\t" | |
"testb %%al,%%al\n\t" | |
"jne 1b\n" | |
"2:\txorl %2,%2\n\t" | |
"stosb" | |
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) | |
: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu), "g" (count) | |
: "memory"); | |
return dest; | |
} | |
#endif | |
int main(int argc, char **argv) | |
{ | |
unsigned long long A=0,B=0,t,avg = 0,min=~0LL, max=0; | |
cpu_set_t set; | |
int samples=10,byte_range=256, pagesize = getpagesize(); | |
char *d, *s; | |
register int i; | |
CPU_ZERO(&set); | |
CPU_SET(0, &set); /*lock proc. to cpu zero to avoid large skews in rdtsc output*/ | |
assert(sched_setaffinity(0, sizeof set, &set) == 0); | |
if(argc > 1) | |
samples = atoi(argv[1]); | |
if(argc > 2) | |
{ | |
byte_range = atoi(argv[2]); | |
if(!byte_range) byte_range = 8; | |
} | |
if(samples < 10) | |
samples = 10; | |
byte_range += 7; | |
byte_range &= ~7; | |
if(byte_range > pagesize) | |
byte_range = pagesize; | |
/* | |
* Lock the zero pages so we don't let page-faults get in | |
*/ | |
d = mmap(0, byte_range, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
s = mmap(0, byte_range, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
assert(d != MAP_FAILED && s != MAP_FAILED); | |
mlock(d, byte_range); | |
mlock(s, byte_range); | |
memset(s, 0xa5, byte_range >> 2); /* copy 1/4th of the byte range into src.*/ | |
for(i = 0; i < samples; ++i) | |
{ | |
barrier(); | |
*d = 0; | |
rdtsc(A); | |
strncpy(d, s, byte_range); | |
rdtsc(B); | |
barrier(); | |
avg+=(t = B-A); | |
if(t < min) min = t; | |
if(t > max) max = t; | |
} | |
printf("samples taken [%d], byte range [%d], Min [%s] time [%lld], max [%lld], avg [%.3f]\n", | |
samples, byte_range, fake ? "strncat":"strncpy", | |
min, max, avg*1.0/samples); | |
return 0; | |
} | |
/* | |
* Local variables: | |
* c-file-style: "linux" | |
* c-basic-offset: 4 | |
* tab-width: 4 | |
* compile-command: "gcc -m32 -Wall -g -o strncpy strncpy.c" | |
* End: | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment