Created
April 19, 2011 20:05
-
-
Save joeyadams/929479 to your computer and use it in GitHub Desktop.
Benchmarking data for line sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Generate 1,000,000 lines of pseudorandom text. | |
* Each line matches ([\x20-\x7E]|[\xA0-\xFF]){0,127}\n | |
* | |
* sha256sum of output: | |
* 64a9d302d9fc1d0dc9d351c706153dfdd7821182ac8c3d7ed5581850c13ae240 | |
* | |
* sha256sum of output, sorted with LC_ALL=C sort : | |
* 6e1e5b2cdb848c02c304d08c565dbd34d394b5ee322019098e10a5b7210f314b | |
*/ | |
#define LINE_COUNT 1000000 | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
typedef struct isaac_ctx isaac_ctx; | |
#define ISAAC_SZ_LOG (8) | |
#define ISAAC_SZ (1<<ISAAC_SZ_LOG) | |
#define ISAAC_SZ_BYTES (ISAAC_SZ*4) | |
#define ISAAC_SEED_SZ_MAX (ISAAC_SZ<<2) | |
struct isaac_ctx{ | |
uint32_t r[ISAAC_SZ]; | |
uint32_t m[ISAAC_SZ]; | |
uint32_t a; | |
uint32_t b; | |
uint32_t c; | |
uint8_t r_bytes[ISAAC_SZ_BYTES]; | |
}; | |
static void isaac_init(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed); | |
static void isaac_reseed(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed); | |
static void isaac_mix(uint32_t _x[8]); | |
static void isaac_update(isaac_ctx *_ctx); | |
int main(void) | |
{ | |
isaac_ctx isaac; | |
uint8_t *rb, *rbe; | |
uint32_t i; | |
isaac_init(&isaac, NULL, 0); | |
rb = isaac.r_bytes; | |
rbe = isaac.r_bytes + ISAAC_SZ_BYTES; | |
#define next_byte() (rb < rbe ? *rb++ : \ | |
(isaac_update(&isaac), rb = isaac.r_bytes, *rb++)) | |
for (i = 0; i < LINE_COUNT; i++) { | |
unsigned int i, len; | |
uint8_t line[128]; | |
len = next_byte(); | |
do | |
len = next_byte(); | |
while (len > 127); | |
for (i = 0; i < len; i++) { | |
for (;;) { | |
uint8_t c = next_byte(); | |
if (c < 0x7E - 0x20 + 1) { | |
line[i] = c + 0x20; | |
break; | |
} else if (c < (0x7E - 0x20 + 1) + (0xFF - 0xA0 + 1)) { | |
line[i] = c - (0x7E - 0x20 + 1) + 0xA0; | |
break; | |
} | |
} | |
} | |
line[len] = '\n'; | |
fwrite(line, 1, len + 1, stdout); | |
} | |
return 0; | |
} | |
/* | |
* ISAAC pseudorandom generator extracted/adapted from code written by: | |
* Timothy B. Terriberry ([email protected]) 1999-2009 public domain. | |
* Based on the public domain implementation by Robert J. Jenkins Jr. | |
*/ | |
static void isaac_init(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed){ | |
_ctx->a=_ctx->b=_ctx->c=0; | |
memset(_ctx->r,0,sizeof(_ctx->r)); | |
isaac_reseed(_ctx,_seed,_nseed); | |
} | |
static void isaac_reseed(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed){ | |
uint32_t *m; | |
uint32_t *r; | |
uint32_t x[8]; | |
int i; | |
int j; | |
m=_ctx->m; | |
r=_ctx->r; | |
if(_nseed>ISAAC_SEED_SZ_MAX)_nseed=ISAAC_SEED_SZ_MAX; | |
for(i=0;i<_nseed>>2;i++){ | |
r[i]^=(uint32_t)_seed[i<<2|3]<<24|(uint32_t)_seed[i<<2|2]<<16| | |
(uint32_t)_seed[i<<2|1]<<8|_seed[i<<2]; | |
} | |
_nseed-=i<<2; | |
if(_nseed>0){ | |
uint32_t ri; | |
ri=_seed[i<<2]; | |
for(j=1;j<_nseed;j++)ri|=(uint32_t)_seed[i<<2|j]<<(j<<3); | |
r[i++]^=ri; | |
} | |
x[0]=x[1]=x[2]=x[3]=x[4]=x[5]=x[6]=x[7]=0x9E3779B9U; | |
for(i=0;i<4;i++)isaac_mix(x); | |
for(i=0;i<ISAAC_SZ;i+=8){ | |
for(j=0;j<8;j++)x[j]+=r[i+j]; | |
isaac_mix(x); | |
memcpy(m+i,x,sizeof(x)); | |
} | |
for(i=0;i<ISAAC_SZ;i+=8){ | |
for(j=0;j<8;j++)x[j]+=m[i+j]; | |
isaac_mix(x); | |
memcpy(m+i,x,sizeof(x)); | |
} | |
isaac_update(_ctx); | |
} | |
static void isaac_mix(uint32_t _x[8]){ | |
static const unsigned char SHIFT[8]={11,2,8,16,10,4,8,9}; | |
int i; | |
for(i=0;i<8;i++){ | |
_x[i]^=_x[(i+1)&7]<<SHIFT[i]; | |
_x[(i+3)&7]+=_x[i]; | |
_x[(i+1)&7]+=_x[(i+2)&7]; | |
i++; | |
_x[i]^=_x[(i+1)&7]>>SHIFT[i]; | |
_x[(i+3)&7]+=_x[i]; | |
_x[(i+1)&7]+=_x[(i+2)&7]; | |
} | |
} | |
/* Extract ISAAC_SZ_LOG bits (starting at bit 2). */ | |
static inline uint32_t lower_bits(uint32_t x) | |
{ | |
return (x & ((ISAAC_SZ-1) << 2)) >> 2; | |
} | |
/* Extract next ISAAC_SZ_LOG bits (starting at bit ISAAC_SZ_LOG+2). */ | |
static inline uint32_t upper_bits(uint32_t y) | |
{ | |
return (y >> (ISAAC_SZ_LOG+2)) & (ISAAC_SZ-1); | |
} | |
static void isaac_update(isaac_ctx *_ctx){ | |
uint32_t *m; | |
uint32_t *r; | |
uint32_t a; | |
uint32_t b; | |
uint32_t x; | |
uint32_t y; | |
int i; | |
m=_ctx->m; | |
r=_ctx->r; | |
a=_ctx->a; | |
b=_ctx->b+(++_ctx->c); | |
for(i=0;i<ISAAC_SZ/2;i++){ | |
x=m[i]; | |
a=(a^a<<13)+m[i+ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a>>6)+m[i+ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a<<2)+m[i+ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a>>16)+m[i+ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
} | |
for(i=ISAAC_SZ/2;i<ISAAC_SZ;i++){ | |
x=m[i]; | |
a=(a^a<<13)+m[i-ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a>>6)+m[i-ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a<<2)+m[i-ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
x=m[++i]; | |
a=(a^a>>16)+m[i-ISAAC_SZ/2]; | |
m[i]=y=m[lower_bits(x)]+a+b; | |
r[i]=b=m[upper_bits(y)]+x; | |
} | |
_ctx->b=b; | |
_ctx->a=a; | |
/* Now unpack the words into bytes. */ | |
{ | |
uint8_t *o = _ctx->r_bytes; | |
unsigned int i; | |
for (i = 0; i < ISAAC_SZ; i++) { | |
uint32_t n = _ctx->r[i]; | |
*o++ = n >> 24; | |
*o++ = n >> 16; | |
*o++ = n >> 8; | |
*o++ = n; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment