Last active
October 24, 2017 06:17
-
-
Save voidlizard/4dd7941d98b557a39e251109340335bf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdbool.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <unistd.h> | |
struct chunk { | |
struct { | |
uint16_t ext; | |
uint16_t len; | |
struct { | |
struct chunk *p; | |
char data[0]; | |
} u; | |
} h; | |
char reserved[7]; | |
char nul[1]; | |
} __attribute__ ((aligned (8))); | |
static inline char *chunk_cstr( struct chunk *c ) { | |
switch( c->h.ext ) { | |
case 1: | |
return c->h.u.p->h.u.data; | |
default: | |
return c->h.u.data; | |
} | |
} | |
static inline size_t chunk_size( uint16_t ext ) { | |
switch( ext ) { | |
case 0: | |
case 1: | |
return sizeof(struct chunk); | |
default: | |
return (1 << ext)*sizeof(struct chunk); | |
} | |
} | |
static inline size_t chunk_data_size( struct chunk *c ) { | |
return chunk_size(c->h.ext) - sizeof(c->h) - 1; | |
} | |
static inline bool isdelim(char chr) { | |
switch(chr) { | |
case ' ': | |
case '\t': | |
case '\r': | |
case '\n': | |
return true; | |
} | |
return false; | |
} | |
struct chunk *read_chunk( FILE *from, struct chunk *c, size_t *cnt ) { | |
tailrec: | |
if( !c || feof(from) ) { | |
return 0; | |
} | |
c->h.len = 0; | |
c->h.u.p = 0; | |
for( ;; ) { | |
int chr = fgetc(from); | |
if( isdelim(chr) || chr < 0 ) { | |
break; | |
} | |
// APPEND CHAR TO TOKEN | |
if( c->h.len >= chunk_data_size(c) ) { | |
if( c->h.ext == 0 ) { | |
struct chunk *tmp = malloc( chunk_size(2) ); | |
memcpy(tmp, c, sizeof(struct chunk)); | |
c->h.ext = 1; | |
c->h.u.p = tmp; | |
tmp->h.ext = 2; | |
c = tmp; | |
} else { | |
c = realloc(c, chunk_size(c->h.ext+1) ); | |
c->h.ext++; | |
} | |
} | |
c->h.u.data[c->h.len++] = chr; | |
} | |
if( !c->h.len ) goto tailrec; | |
// EMIT CURRENT CHUNK | |
c->h.u.data[c->h.len] = 0; | |
(*cnt)++; | |
return c; | |
} | |
static inline void *advance(size_t *gen, struct chunk **p, size_t *n, size_t i) { | |
if( i >= *n ) { | |
*n = *n + *gen; | |
*p = realloc(*p, *n * sizeof(struct chunk)); | |
(*gen)+=*n; | |
} | |
return *p; | |
} | |
static char output[4096*512+8]; | |
static char *out_e = output + sizeof(output); | |
static char *out_p = output; | |
static inline void dump_fflush() { | |
write(1, output, out_p - output); | |
out_p = output; | |
return; | |
} | |
static inline size_t dump_avail() { | |
return out_e - out_p - 8; | |
} | |
static inline void dump_char(char c) { | |
if( !dump_avail() ) { | |
dump_fflush(); | |
} | |
*out_p++ = c; | |
} | |
static inline void dump_chunk(struct chunk *c) { | |
const size_t len = c->h.len; | |
char *src = chunk_cstr(c); | |
if( dump_avail() < len ) { | |
dump_fflush(); | |
} | |
if( __builtin_expect(!!(len>8), 0) ) { | |
memcpy(out_p, src, len); | |
} else { | |
memcpy(out_p, src, 8); | |
} | |
src += len; | |
out_p += len; | |
} | |
int main(int argc, char **argv) { | |
size_t i = 0; | |
struct chunk *p = calloc(sizeof(struct chunk), 1000); | |
struct chunk *s = calloc(sizeof(struct chunk), 1000); | |
size_t pi = 0, pe = 10, pg = 1; | |
size_t si = 0, se = 10, sg = 1; | |
struct chunk *r = 0, *to = &p[pi++]; | |
size_t pn = 0, sn = 0; | |
size_t *k = &pn; | |
while( (r = read_chunk(stdin, to, k) ) ) { | |
if( (++i % 2) == 0 ) { | |
advance(&pg, &p, &pe, pi); | |
to = &p[pi++]; | |
k = &pn; | |
} else { | |
advance(&sg, &s, &se, si); | |
to = &s[si++]; | |
k = &sn; | |
} | |
} | |
for(i = 0; i < pn; i++ ) { | |
size_t j = 0; | |
const struct chunk *pref = &p[i]; | |
for(j = 0; j < sn; j++ ) { | |
dump_chunk(pref); | |
dump_chunk(&s[j]); | |
dump_char('\n'); | |
} | |
} | |
dump_fflush(); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment