eatonphil · November 12, 2023 19:58
diff --git a/main.c b/main.c
 #include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>

 #define NUM_RECORDS 1000000 // 1M
 #define NUM_QUERIES 1000 // number of queries to run in a row

 #define LOCATION_ID 42

 struct User {
  int id;
  char name[255];
  int birth_location_id;
  int time_of_birth;
  int company;
  int state;
  int children;
  bool married;
 };

 struct ColDB {
  int *id;
  char *name; // store the names contiguously to mimic row db
  int *birth_location_id;
  int *time_of_birth;
  int *company;
  int *state;
  int *children;
  bool *married;
 } col_db;

 // row oriented DB is just an array of users.
 struct User *row_db;
 struct ColDB col_db;

 static void setup(void) {
  row_db = malloc(sizeof(struct User) * NUM_RECORDS);
  assert(row_db != NULL);

  col_db.id = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.id != NULL);
  col_db.name = malloc(255 * NUM_RECORDS);
  assert(col_db.name != NULL);
  col_db.birth_location_id = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.birth_location_id != NULL);
  col_db.time_of_birth = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.time_of_birth != NULL);
  col_db.company = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.company != NULL);
  col_db.state = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.state != NULL);
  col_db.children = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.children != NULL);
  col_db.married = malloc(sizeof(int) * NUM_RECORDS);
  assert(col_db.married != NULL);

  srand((unsigned int)time(NULL));

  for (size_t i = 0; i < NUM_RECORDS; i++) {
    int id = abs(rand());
    int l = abs(rand()) % 100;   // make some duplicate birth locations...
    int t = abs(rand()) % 86400; // seconds in a day

    row_db[i].id = id;
    row_db[i].name[32] = '\0'; // just so we can print the names
    row_db[i].birth_location_id = l;
    row_db[i].time_of_birth = t;
    row_db[i].company = l;
    row_db[i].state = l;
    row_db[i].children = l;
    row_db[i].married = t % 10;

    col_db.id[i] = id;
    col_db.name[i * 255 + 32] = '\0';
    col_db.birth_location_id[i] = l;
    col_db.time_of_birth[i] = t;
    col_db.company[i] = l;
    col_db.state[i] = l;
    col_db.children[i] = l;
    col_db.married[i] = t % 10;
  }
 }

 // SELECT COUNT(1) FROM users WHERE location = 42
 static size_t rcount_users_by_location() {
  size_t count = 0;

  for (size_t i = 0; i < NUM_RECORDS; i++) {
    struct User u = row_db[i];
    if (u.birth_location_id == LOCATION_ID) {
      count++;
    }
  }

  return count;
 }

 // SELECT COUNT(1) FROM users WHERE location = 42
 static size_t ccount_users_by_location() {
  size_t count = 0;

  for (size_t i = 0; i < NUM_RECORDS; i++) {
    if (col_db.birth_location_id[i] == LOCATION_ID) {
      count++;
    }
  }

  return count;
 }

 struct Users {
  size_t size;
  size_t offset;
  struct User* users;
 };

 static struct Users* users_init() {
  struct Users* users = (struct Users*)malloc(sizeof(struct User));
  assert(users != NULL);
  users->size = NUM_RECORDS;
  users->offset = 0;
  users->users = (struct User*)malloc(sizeof(struct User) * users->size);
  assert(users->users != NULL);
  return users;
 }

 static void users_reset(struct Users* users) {
  users->offset = 0;
 }

 static void users_append(struct Users* users, struct User user) {
  assert(users->offset < users->size);
  users->users[users->offset] = user;
  users->offset++;
 }

 static void users_free(struct Users* users) {
  free(users->users);
  free(users);
 }

 // SELECT * FROM users WHERE location > 1
 static struct Users* rusers_by_location(struct Users* users) {
  for (size_t i = 0; i < NUM_RECORDS; i++) {
    struct User u = row_db[i];
    if (u.birth_location_id > 1) {
      users_append(users, u);
    }
  }

  return users;
 }

 // SELECT * FROM users WHERE location > 1
 static struct Users* cusers_by_location(struct Users *users) {
  for (size_t i = 0; i < NUM_RECORDS; i++) {
    if (col_db.birth_location_id[i] > 1) {
      struct User u = {
 	.id = col_db.id[i],
 	.birth_location_id = col_db.birth_location_id[i],
 	.time_of_birth = col_db.time_of_birth[i],
 	.company = col_db.company[i],
 	.state = col_db.state[i],
 	.children = col_db.children[i],
 	.married = col_db.married[i]
      };
      memcpy(u.name, col_db.name + (i * 255), 255);
      users_append(users, u);
    }
  }

  return users;
 }


 // SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
 static struct Users* rusers_restricted(struct Users* users) {
  for (size_t i = 0; i < NUM_RECORDS; i++) {
    struct User u = row_db[i];
    if (u.birth_location_id == 42 || u.company == 1 || u.state == 12 || u.married == true || u.children > 2) {
      users_append(users, u);
    }
  }

  return users;
 }

 // SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
 static struct Users* cusers_restricted(struct Users *users) {
  for (size_t i = 0; i < NUM_RECORDS; i++) {
    if (col_db.birth_location_id[i] == 42 || col_db.company[i] == 1 || col_db.state[i] == 12 || col_db.married[i] == true || col_db.children[i] > 2) {
      struct User u = {
 	.id = col_db.id[i],
 	.birth_location_id = col_db.birth_location_id[i],
 	.time_of_birth = col_db.time_of_birth[i],
 	.company = col_db.company[i],
 	.state = col_db.state[i],
 	.children = col_db.children[i],
 	.married = col_db.married[i]
      };
      memcpy(u.name, col_db.name + (i * 255), 255);
      users_append(users, u);
    }
  }

  return users;
 }

 // SELECT MAX(time_of_birth) FROM users;
 static int cmax_time_of_birth(void) {
  int result = 0;

  for (size_t i = 0; i < NUM_RECORDS; i++) {
    int t = col_db.time_of_birth[i];
    if (t > result)
      result = t;
  }

  return result;
 }

 // SELECT MAX(time_of_birth) FROM users;
 static int rmax_time_of_birth(void) {
  int result = 0;

  for (size_t i = 0; i < NUM_RECORDS; i++) {
    int t = row_db[i].time_of_birth;
    if (t > result)
      result = t;
  }

  return result;
 }

 int main(int argc, char **argv) {
  setup();

  int c, r = 0;
  size_t result_count = 0;

  struct User *results = malloc(sizeof(struct User) * NUM_RECORDS);
  assert(results != NULL);
  
  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    r = rmax_time_of_birth();
  }
  printf("[ROW] elapsed: %lds\trmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, r);

  time_t start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    c = cmax_time_of_birth();
  }
  printf("[COL] elapsed: %lds\tcmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, c);

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    result_count = rcount_users_by_location();
  }
  printf("[ROW] elapsed: %lds\trecords for rcount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
      time(NULL) - start, result_count);

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    result_count = ccount_users_by_location();
  }
  printf("[COL] elapsed: %lds\trecords for ccount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
      time(NULL) - start, result_count);

  struct Users* users = users_init();

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    users = rusers_by_location(users);
    result_count = users->offset;
    users_reset(users);
  }
  printf("[ROW] elapsed: %lds\trecords for r_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
      time(NULL) - start, result_count);

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    users = cusers_by_location(users);
    result_count = users->offset;
    users_reset(users);
  }
  printf("[COL] elapsed: %lds\trecords for c_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
 	 time(NULL) - start, result_count);

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    users = rusers_restricted(users);
    result_count = users->offset;
    users_reset(users);
  }
  printf("[ROW] elapsed: %lds\trecords for r_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
      time(NULL) - start, result_count);

  start = time(NULL);
  for (size_t i = 0; i < NUM_QUERIES; i++) {
    users = cusers_restricted(users);
    result_count = users->offset;
    users_reset(users);
  }
  printf("[COL] elapsed: %lds\trecords for c_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
 	 time(NULL) - start, result_count);
  
  users_free(users);

  return EXIT_SUCCESS;
 }
diff --git a/Makefile b/Makefile
 default:
 	gcc -O0 -g3 -Wall -Wextra -Wconversion -Wdouble-promotion \
 		-Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion \
 		main.c

 format:
 	clang-format -i main.c
diff --git a/out.txt b/out.txt
 $ make
 $ ./a.out
 [ROW] elapsed: 9s  rmax_time_of_birth: 86399                        [SELECT MAX(time_of_birth) FROM users;]
 [COL] elapsed: 2s  cmax_time_of_birth: 86399                        [SELECT MAX(time_of_birth) FROM users;]
 [ROW] elapsed: 6s  records for rcount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
 [COL] elapsed: 1s  records for ccount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
 [ROW] elapsed: 19s records for r_users_by_location: 980011          [SELECT * FROM users WHERE location > 1]
 [COL] elapsed: 24s records for c_users_by_location: 980011          [SELECT * FROM users WHERE location > 1]
 [ROW] elapsed: 21s records for r_users_restricted: 998008           [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]
 [COL] elapsed: 27s records for c_users_restricted: 998008           [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]
	#include <assert.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <time.h>

	#define NUM_RECORDS 1000000 // 1M
	#define NUM_QUERIES 1000 // number of queries to run in a row

	#define LOCATION_ID 42

	struct User {
	int id;
	char name[255];
	int birth_location_id;
	int time_of_birth;
	int company;
	int state;
	int children;
	bool married;
	};

	struct ColDB {
	int *id;
	char *name; // store the names contiguously to mimic row db
	int *birth_location_id;
	int *time_of_birth;
	int *company;
	int *state;
	int *children;
	bool *married;
	} col_db;

	// row oriented DB is just an array of users.
	struct User *row_db;
	struct ColDB col_db;

	static void setup(void) {
	row_db = malloc(sizeof(struct User) * NUM_RECORDS);
	assert(row_db != NULL);

	col_db.id = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.id != NULL);
	col_db.name = malloc(255 * NUM_RECORDS);
	assert(col_db.name != NULL);
	col_db.birth_location_id = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.birth_location_id != NULL);
	col_db.time_of_birth = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.time_of_birth != NULL);
	col_db.company = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.company != NULL);
	col_db.state = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.state != NULL);
	col_db.children = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.children != NULL);
	col_db.married = malloc(sizeof(int) * NUM_RECORDS);
	assert(col_db.married != NULL);

	srand((unsigned int)time(NULL));

	for (size_t i = 0; i < NUM_RECORDS; i++) {
	int id = abs(rand());
	int l = abs(rand()) % 100; // make some duplicate birth locations...
	int t = abs(rand()) % 86400; // seconds in a day

	row_db[i].id = id;
	row_db[i].name[32] = '\0'; // just so we can print the names
	row_db[i].birth_location_id = l;
	row_db[i].time_of_birth = t;
	row_db[i].company = l;
	row_db[i].state = l;
	row_db[i].children = l;
	row_db[i].married = t % 10;

	col_db.id[i] = id;
	col_db.name[i * 255 + 32] = '\0';
	col_db.birth_location_id[i] = l;
	col_db.time_of_birth[i] = t;
	col_db.company[i] = l;
	col_db.state[i] = l;
	col_db.children[i] = l;
	col_db.married[i] = t % 10;
	}
	}

	// SELECT COUNT(1) FROM users WHERE location = 42
	static size_t rcount_users_by_location() {
	size_t count = 0;

	for (size_t i = 0; i < NUM_RECORDS; i++) {
	struct User u = row_db[i];
	if (u.birth_location_id == LOCATION_ID) {
	count++;
	}
	}

	return count;
	}

	// SELECT COUNT(1) FROM users WHERE location = 42
	static size_t ccount_users_by_location() {
	size_t count = 0;

	for (size_t i = 0; i < NUM_RECORDS; i++) {
	if (col_db.birth_location_id[i] == LOCATION_ID) {
	count++;
	}
	}

	return count;
	}

	struct Users {
	size_t size;
	size_t offset;
	struct User* users;
	};

	static struct Users* users_init() {
	struct Users* users = (struct Users*)malloc(sizeof(struct User));
	assert(users != NULL);
	users->size = NUM_RECORDS;
	users->offset = 0;
	users->users = (struct User)malloc(sizeof(struct User) users->size);
	assert(users->users != NULL);
	return users;
	}

	static void users_reset(struct Users* users) {
	users->offset = 0;
	}

	static void users_append(struct Users* users, struct User user) {
	assert(users->offset < users->size);
	users->users[users->offset] = user;
	users->offset++;
	}

	static void users_free(struct Users* users) {
	free(users->users);
	free(users);
	}

	// SELECT * FROM users WHERE location > 1
	static struct Users* rusers_by_location(struct Users* users) {
	for (size_t i = 0; i < NUM_RECORDS; i++) {
	struct User u = row_db[i];
	if (u.birth_location_id > 1) {
	users_append(users, u);
	}
	}

	return users;
	}

	// SELECT * FROM users WHERE location > 1
	static struct Users* cusers_by_location(struct Users *users) {
	for (size_t i = 0; i < NUM_RECORDS; i++) {
	if (col_db.birth_location_id[i] > 1) {
	struct User u = {
	.id = col_db.id[i],
	.birth_location_id = col_db.birth_location_id[i],
	.time_of_birth = col_db.time_of_birth[i],
	.company = col_db.company[i],
	.state = col_db.state[i],
	.children = col_db.children[i],
	.married = col_db.married[i]
	};
	memcpy(u.name, col_db.name + (i * 255), 255);
	users_append(users, u);
	}
	}

	return users;
	}


	// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
	static struct Users* rusers_restricted(struct Users* users) {
	for (size_t i = 0; i < NUM_RECORDS; i++) {
	struct User u = row_db[i];
	if (u.birth_location_id == 42 \|\| u.company == 1 \|\| u.state == 12 \|\| u.married == true \|\| u.children > 2) {
	users_append(users, u);
	}
	}

	return users;
	}

	// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
	static struct Users* cusers_restricted(struct Users *users) {
	for (size_t i = 0; i < NUM_RECORDS; i++) {
	if (col_db.birth_location_id[i] == 42 \|\| col_db.company[i] == 1 \|\| col_db.state[i] == 12 \|\| col_db.married[i] == true \|\| col_db.children[i] > 2) {
	struct User u = {
	.id = col_db.id[i],
	.birth_location_id = col_db.birth_location_id[i],
	.time_of_birth = col_db.time_of_birth[i],
	.company = col_db.company[i],
	.state = col_db.state[i],
	.children = col_db.children[i],
	.married = col_db.married[i]
	};
	memcpy(u.name, col_db.name + (i * 255), 255);
	users_append(users, u);
	}
	}

	return users;
	}

	// SELECT MAX(time_of_birth) FROM users;
	static int cmax_time_of_birth(void) {
	int result = 0;

	for (size_t i = 0; i < NUM_RECORDS; i++) {
	int t = col_db.time_of_birth[i];
	if (t > result)
	result = t;
	}

	return result;
	}

	// SELECT MAX(time_of_birth) FROM users;
	static int rmax_time_of_birth(void) {
	int result = 0;

	for (size_t i = 0; i < NUM_RECORDS; i++) {
	int t = row_db[i].time_of_birth;
	if (t > result)
	result = t;
	}

	return result;
	}

	int main(int argc, char **argv) {
	setup();

	int c, r = 0;
	size_t result_count = 0;

	struct User results = malloc(sizeof(struct User) NUM_RECORDS);
	assert(results != NULL);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	r = rmax_time_of_birth();
	}
	printf("[ROW] elapsed: %lds\trmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, r);

	time_t start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	c = cmax_time_of_birth();
	}
	printf("[COL] elapsed: %lds\tcmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, c);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	result_count = rcount_users_by_location();
	}
	printf("[ROW] elapsed: %lds\trecords for rcount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
	time(NULL) - start, result_count);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	result_count = ccount_users_by_location();
	}
	printf("[COL] elapsed: %lds\trecords for ccount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
	time(NULL) - start, result_count);

	struct Users* users = users_init();

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	users = rusers_by_location(users);
	result_count = users->offset;
	users_reset(users);
	}
	printf("[ROW] elapsed: %lds\trecords for r_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
	time(NULL) - start, result_count);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	users = cusers_by_location(users);
	result_count = users->offset;
	users_reset(users);
	}
	printf("[COL] elapsed: %lds\trecords for c_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
	time(NULL) - start, result_count);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	users = rusers_restricted(users);
	result_count = users->offset;
	users_reset(users);
	}
	printf("[ROW] elapsed: %lds\trecords for r_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
	time(NULL) - start, result_count);

	start = time(NULL);
	for (size_t i = 0; i < NUM_QUERIES; i++) {
	users = cusers_restricted(users);
	result_count = users->offset;
	users_reset(users);
	}
	printf("[COL] elapsed: %lds\trecords for c_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
	time(NULL) - start, result_count);

	users_free(users);

	return EXIT_SUCCESS;
	}
	default:
	gcc -O0 -g3 -Wall -Wextra -Wconversion -Wdouble-promotion \
	-Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion \
	main.c

	format:
	clang-format -i main.c
	$ make
	$ ./a.out
	[ROW] elapsed: 9s rmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;]
	[COL] elapsed: 2s cmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;]
	[ROW] elapsed: 6s records for rcount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
	[COL] elapsed: 1s records for ccount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
	[ROW] elapsed: 19s records for r_users_by_location: 980011 [SELECT * FROM users WHERE location > 1]
	[COL] elapsed: 24s records for c_users_by_location: 980011 [SELECT * FROM users WHERE location > 1]
	[ROW] elapsed: 21s records for r_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]
	[COL] elapsed: 27s records for c_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]