Skip to content

Instantly share code, notes, and snippets.

@joeyadams
Created April 19, 2011 20:05
Show Gist options
  • Save joeyadams/929479 to your computer and use it in GitHub Desktop.
Save joeyadams/929479 to your computer and use it in GitHub Desktop.
Benchmarking data for line sorting
/*
* Generate 1,000,000 lines of pseudorandom text.
* Each line matches ([\x20-\x7E]|[\xA0-\xFF]){0,127}\n
*
* sha256sum of output:
* 64a9d302d9fc1d0dc9d351c706153dfdd7821182ac8c3d7ed5581850c13ae240
*
* sha256sum of output, sorted with LC_ALL=C sort :
* 6e1e5b2cdb848c02c304d08c565dbd34d394b5ee322019098e10a5b7210f314b
*/
#define LINE_COUNT 1000000
#include <stdint.h>
#include <stdio.h>
#include <string.h>
typedef struct isaac_ctx isaac_ctx;
#define ISAAC_SZ_LOG (8)
#define ISAAC_SZ (1<<ISAAC_SZ_LOG)
#define ISAAC_SZ_BYTES (ISAAC_SZ*4)
#define ISAAC_SEED_SZ_MAX (ISAAC_SZ<<2)
struct isaac_ctx{
uint32_t r[ISAAC_SZ];
uint32_t m[ISAAC_SZ];
uint32_t a;
uint32_t b;
uint32_t c;
uint8_t r_bytes[ISAAC_SZ_BYTES];
};
static void isaac_init(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed);
static void isaac_reseed(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed);
static void isaac_mix(uint32_t _x[8]);
static void isaac_update(isaac_ctx *_ctx);
int main(void)
{
isaac_ctx isaac;
uint8_t *rb, *rbe;
uint32_t i;
isaac_init(&isaac, NULL, 0);
rb = isaac.r_bytes;
rbe = isaac.r_bytes + ISAAC_SZ_BYTES;
#define next_byte() (rb < rbe ? *rb++ : \
(isaac_update(&isaac), rb = isaac.r_bytes, *rb++))
for (i = 0; i < LINE_COUNT; i++) {
unsigned int i, len;
uint8_t line[128];
len = next_byte();
do
len = next_byte();
while (len > 127);
for (i = 0; i < len; i++) {
for (;;) {
uint8_t c = next_byte();
if (c < 0x7E - 0x20 + 1) {
line[i] = c + 0x20;
break;
} else if (c < (0x7E - 0x20 + 1) + (0xFF - 0xA0 + 1)) {
line[i] = c - (0x7E - 0x20 + 1) + 0xA0;
break;
}
}
}
line[len] = '\n';
fwrite(line, 1, len + 1, stdout);
}
return 0;
}
/*
* ISAAC pseudorandom generator extracted/adapted from code written by:
* Timothy B. Terriberry ([email protected]) 1999-2009 public domain.
* Based on the public domain implementation by Robert J. Jenkins Jr.
*/
static void isaac_init(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed){
_ctx->a=_ctx->b=_ctx->c=0;
memset(_ctx->r,0,sizeof(_ctx->r));
isaac_reseed(_ctx,_seed,_nseed);
}
static void isaac_reseed(isaac_ctx *_ctx,const unsigned char *_seed,int _nseed){
uint32_t *m;
uint32_t *r;
uint32_t x[8];
int i;
int j;
m=_ctx->m;
r=_ctx->r;
if(_nseed>ISAAC_SEED_SZ_MAX)_nseed=ISAAC_SEED_SZ_MAX;
for(i=0;i<_nseed>>2;i++){
r[i]^=(uint32_t)_seed[i<<2|3]<<24|(uint32_t)_seed[i<<2|2]<<16|
(uint32_t)_seed[i<<2|1]<<8|_seed[i<<2];
}
_nseed-=i<<2;
if(_nseed>0){
uint32_t ri;
ri=_seed[i<<2];
for(j=1;j<_nseed;j++)ri|=(uint32_t)_seed[i<<2|j]<<(j<<3);
r[i++]^=ri;
}
x[0]=x[1]=x[2]=x[3]=x[4]=x[5]=x[6]=x[7]=0x9E3779B9U;
for(i=0;i<4;i++)isaac_mix(x);
for(i=0;i<ISAAC_SZ;i+=8){
for(j=0;j<8;j++)x[j]+=r[i+j];
isaac_mix(x);
memcpy(m+i,x,sizeof(x));
}
for(i=0;i<ISAAC_SZ;i+=8){
for(j=0;j<8;j++)x[j]+=m[i+j];
isaac_mix(x);
memcpy(m+i,x,sizeof(x));
}
isaac_update(_ctx);
}
static void isaac_mix(uint32_t _x[8]){
static const unsigned char SHIFT[8]={11,2,8,16,10,4,8,9};
int i;
for(i=0;i<8;i++){
_x[i]^=_x[(i+1)&7]<<SHIFT[i];
_x[(i+3)&7]+=_x[i];
_x[(i+1)&7]+=_x[(i+2)&7];
i++;
_x[i]^=_x[(i+1)&7]>>SHIFT[i];
_x[(i+3)&7]+=_x[i];
_x[(i+1)&7]+=_x[(i+2)&7];
}
}
/* Extract ISAAC_SZ_LOG bits (starting at bit 2). */
static inline uint32_t lower_bits(uint32_t x)
{
return (x & ((ISAAC_SZ-1) << 2)) >> 2;
}
/* Extract next ISAAC_SZ_LOG bits (starting at bit ISAAC_SZ_LOG+2). */
static inline uint32_t upper_bits(uint32_t y)
{
return (y >> (ISAAC_SZ_LOG+2)) & (ISAAC_SZ-1);
}
static void isaac_update(isaac_ctx *_ctx){
uint32_t *m;
uint32_t *r;
uint32_t a;
uint32_t b;
uint32_t x;
uint32_t y;
int i;
m=_ctx->m;
r=_ctx->r;
a=_ctx->a;
b=_ctx->b+(++_ctx->c);
for(i=0;i<ISAAC_SZ/2;i++){
x=m[i];
a=(a^a<<13)+m[i+ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a>>6)+m[i+ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a<<2)+m[i+ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a>>16)+m[i+ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
}
for(i=ISAAC_SZ/2;i<ISAAC_SZ;i++){
x=m[i];
a=(a^a<<13)+m[i-ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a>>6)+m[i-ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a<<2)+m[i-ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
x=m[++i];
a=(a^a>>16)+m[i-ISAAC_SZ/2];
m[i]=y=m[lower_bits(x)]+a+b;
r[i]=b=m[upper_bits(y)]+x;
}
_ctx->b=b;
_ctx->a=a;
/* Now unpack the words into bytes. */
{
uint8_t *o = _ctx->r_bytes;
unsigned int i;
for (i = 0; i < ISAAC_SZ; i++) {
uint32_t n = _ctx->r[i];
*o++ = n >> 24;
*o++ = n >> 16;
*o++ = n >> 8;
*o++ = n;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment