Last active
March 25, 2024 06:51
-
-
Save skeeto/c0665c0a995fe801e1d88b3d0fd9acdb to your computer and use it in GitHub Desktop.
Playing around with a little database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ cc -o persona persona.c | |
// $ ./persona <test.txt | |
// Ref: https://old.reddit.com/r/C_Programming/comments/1bmfb7p | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <string.h> | |
#define assert(c) while (!(c)) *(volatile int *)0 = 0 | |
#define countof(a) (ptrdiff_t)(sizeof(a) / sizeof(*(a))) | |
#define new(a, t, n) (t *)alloc(a, sizeof(t), _Alignof(t), n) | |
#define s(s) (Str){(unsigned char *)s, countof(s)-1} | |
typedef struct { | |
char *beg; | |
char *end; | |
} Arena; | |
static void *alloc(Arena *a, ptrdiff_t size, ptrdiff_t align, ptrdiff_t count) | |
{ | |
ptrdiff_t pad = (uintptr_t)a->end & (align - 1); | |
assert(count <= (a->end - a->beg - pad)/size); // out of memory | |
return memset(a->end -= size*count + pad, 0, size*count); | |
} | |
static _Bool digit(unsigned char c) | |
{ | |
return c>='0' && c<='9'; | |
} | |
static _Bool whitespace(unsigned char c) | |
{ | |
return c=='\t' || c=='\n' || c=='\r' || c==' '; | |
} | |
typedef struct { | |
unsigned char *data; | |
ptrdiff_t len; | |
} Str; | |
static Str span(unsigned char *beg, unsigned char *end) | |
{ | |
assert(beg); | |
assert(end); | |
assert(beg <= end); | |
Str r = {0}; | |
r.data = beg; | |
r.len = end - beg; | |
return r; | |
} | |
static _Bool equals(Str a, Str b) | |
{ | |
return a.len==b.len && !memcmp(a.data, b.data, a.len); | |
} | |
static uint32_t hash(Str s) | |
{ | |
uint32_t h = 0x811c9dc5; | |
for (ptrdiff_t i = 0; i < s.len; i++) { | |
h ^= s.data[i]; | |
h *= 0x1000193u; | |
} | |
return h; | |
} | |
// Remove leading and trailing whitespace. | |
static Str trim(Str s) | |
{ | |
unsigned char *beg = s.data; | |
unsigned char *end = beg + s.len; | |
for (; beg<end && whitespace(*beg); beg++) {} | |
for (; end>beg && whitespace(end[-1]); end--) {} | |
return span(beg, end); | |
} | |
typedef struct { | |
Str head; | |
Str tail; | |
_Bool ok; // matched? | |
} Cut; | |
// Split string on the first instance of the given byte value. | |
static Cut cut(Str s, unsigned char c) | |
{ | |
unsigned char *beg = s.data; | |
unsigned char *cut = beg; | |
unsigned char *end = s.data + s.len; | |
for (; cut<end && *cut!=c; cut++) {} | |
Cut r = {0}; | |
r.ok = cut < end; | |
r.head = span(beg, cut); | |
r.tail = span(cut<end ? cut+1 : cut, end); | |
return r; | |
} | |
typedef struct { | |
int value; | |
_Bool ok; | |
} ParsedInt; | |
// Parse a non-negative integer. | |
static ParsedInt parseint(Str s, int min, int max) | |
{ | |
ParsedInt r = {0}; | |
if (!s.len) { | |
return r; | |
} | |
for (ptrdiff_t i = 0; i < s.len; i++) { | |
if (!digit(s.data[i])) { | |
return r; | |
} | |
int v = s.data[i] - '0'; | |
if (v>max || r.value>(max - v)/10) { | |
return r; | |
} | |
r.value = r.value*10 + v; | |
} | |
r.ok = r.value >= min; | |
return r; | |
} | |
typedef struct { | |
Str nome; | |
Str cognome; | |
int giorno; | |
int mese; | |
int anno; | |
Str cf; | |
int retta; | |
} Dati; | |
typedef struct Persona Persona; | |
struct Persona { | |
Persona *next; | |
Persona *child[2]; // intrusive hashmap on cognome | |
Dati info; | |
}; | |
static void insert(Persona **m, Persona *p) | |
{ | |
for (uint32_t h = hash(p->info.cognome); *m; h <<= 1) { | |
m = &(*m)->child[h>>31]; | |
} | |
*m = p; | |
} | |
static Persona *lookup(Persona *m, Str cognome) | |
{ | |
for (uint32_t h = hash(cognome); m; h <<= 1) { | |
if (equals(m->info.cognome, cognome)) { | |
return m; | |
} | |
m = m->child[h>>31]; | |
} | |
return 0; // not found | |
} | |
typedef struct { | |
Persona *personas; // linked list | |
Persona *bycognome; // hash map index | |
ptrdiff_t line; | |
int field; | |
_Bool ok; | |
} Database; | |
static Database parsecsv(Str s, Arena *perm) | |
{ | |
Database r = {0}; | |
Cut line = {0}; | |
line.tail = s; | |
Persona **tail = &r.personas; | |
for (;;) { | |
r.field = 1; | |
line = cut(line.tail, '\n'); | |
if (!line.head.len) { | |
r.ok = !line.tail.len; | |
return r; | |
} | |
r.line++; | |
_Bool err = 0; | |
Cut field = {0}; | |
ParsedInt pi = {0}; | |
field.tail = line.head; | |
Persona *p = new(perm, Persona, 1); | |
field = cut(field.tail, ','); | |
err |= !field.ok; | |
p->info.nome = trim(field.head); | |
r.field += !err; | |
field = cut(field.tail, ','); | |
err |= !field.ok; | |
p->info.cognome = trim(field.head); | |
r.field += !err; | |
field = cut(field.tail, ','); | |
pi = parseint(trim(field.head), 1, 31); | |
err |= !pi.ok; | |
p->info.giorno = pi.value; | |
r.field += !err; | |
field = cut(field.tail, ','); | |
pi = parseint(trim(field.head), 1, 12); | |
err |= !pi.ok; | |
p->info.mese = pi.value; | |
r.field += !err; | |
field = cut(field.tail, ','); | |
pi = parseint(trim(field.head), 1900, 9999); | |
err |= !pi.ok; | |
p->info.anno = pi.value; | |
r.field += !err; | |
field = cut(field.tail, ','); | |
err |= !field.ok; | |
p->info.cf = trim(field.head); | |
r.field += !err; | |
field = cut(field.tail, ','); | |
pi = parseint(trim(field.head), 0, 0x7fffffff); | |
err |= !pi.ok; | |
p->info.retta = pi.value; | |
r.field += !err; | |
if (err) { | |
return r; // bad input | |
} | |
// success: insert into the linked list and hash map | |
*tail = p; | |
tail = &p->next; | |
insert(&r.bycognome, p); | |
} | |
} | |
typedef struct { | |
Arena *perm; | |
Str buf; | |
_Bool err; | |
} StrBuf; | |
// String builder that grows into the given arena. | |
static StrBuf newstr(Arena *perm) | |
{ | |
StrBuf r = {0}; | |
r.perm = perm; | |
r.buf.data = (unsigned char *)perm->beg; | |
return r; | |
} | |
static void printstr(StrBuf *b, Str s) | |
{ | |
unsigned char *end = b->buf.data + b->buf.len; | |
assert(end == (unsigned char *)b->perm->beg); | |
ptrdiff_t available = b->perm->end - b->perm->beg; | |
if (s.len > available) { | |
b->err = 1; // truncated | |
s.len = available; | |
} | |
memcpy(end, s.data, s.len); | |
b->buf.len += s.len; | |
b->perm->beg += s.len; | |
} | |
static void printint(StrBuf *b, int x) | |
{ | |
unsigned char buf[32]; | |
unsigned char *end = buf + countof(buf); | |
unsigned char *beg = end; | |
int t = x<0 ? x : -x; | |
do { | |
*--beg = '0' - (unsigned char)(t%10); | |
} while (t /= 10); | |
if (x < 0) { | |
*--beg = '-'; | |
} | |
printstr(b, span(beg, end)); | |
} | |
static void print(StrBuf *b, Persona *p) | |
{ | |
printstr(b, s("Nome: ")); | |
printstr(b, p->info.nome); | |
printstr(b, s("\nCognome: ")); | |
printstr(b, p->info.cognome); | |
printstr(b, s("\nData di nascita: ")); | |
printint(b, p->info.giorno); | |
printstr(b, s("/")); | |
printint(b, p->info.mese); | |
printstr(b, s("/")); | |
printint(b, p->info.anno); | |
printstr(b, s("\nCF: ")); | |
printstr(b, p->info.cf); | |
printstr(b, s("\nRetta: ")); | |
printint(b, p->info.retta); | |
printstr(b, s(" EURO\n")); | |
} | |
// Demo | |
#include <stdio.h> | |
#include <stdlib.h> | |
static Str loadfile(FILE *f, Arena *perm) | |
{ | |
Str r = {0}; | |
r.data = (unsigned char *)perm->beg; | |
r.len = fread(r.data, 1, perm->end-perm->beg, f); | |
perm->beg += r.len; | |
return r; | |
} | |
int main(void) | |
{ | |
ptrdiff_t cap = (ptrdiff_t)1<<24; | |
Arena scratch = {0}; | |
scratch.beg = malloc(cap); | |
scratch.end = scratch.beg + cap; | |
// Parse standard input into data structure | |
Str input = loadfile(stdin, &scratch); | |
Database r = parsecsv(input, &scratch); | |
if (!r.ok) { | |
fprintf(stderr, "<stdin>:%td:%d: invalid input\n", r.line, r.field); | |
} | |
// Display the results | |
StrBuf b = newstr(&scratch); | |
for (Persona *p = r.personas; p; p = p->next) { | |
if (p != r.personas) { | |
printstr(&b, s("===\n")); | |
} | |
print(&b, p); | |
} | |
fwrite(b.buf.data, 1, b.buf.len, stdout); | |
fflush(stdout); | |
return b.err || ferror(stdout); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Jon,Doe,28,8,1952,NGRC52M68H118R,1995 | |
Foo,Bar,24,3,2024,ABCDEF01234567,54321 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment