Sammons · January 3, 2016 08:42 · ghost · Jan 4, 2016
diff --git a/index-text-file.c b/index-text-file.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 struct offset_node_s {
 	struct offset_node_s * next;
 	unsigned long offset;
 };
 typedef struct offset_node_s offset_node;

 struct index_s {
 	unsigned int size;
 	offset_node ** offsets_table; 
 };

 typedef struct index_s hash_index;

 unsigned int hash(char* str, int strlength, unsigned int max_size) {
 	unsigned int value = 0;
 	unsigned int tmp = 0;
 	for (int i = 0; i < strlength; ++i) {
 		tmp |= (unsigned int)str[i];
 		tmp = tmp << sizeof(char);
 		value ^= tmp;
 		tmp = 0;
 	}
 	return value % max_size;
 }

 void free_index(hash_index* ind) {
 	for (int i = 0; i < ind->size; ++i) {
 		offset_node* cur_ptr = NULL;
 		offset_node* next_ptr = NULL;
 		cur_ptr = ind->offsets_table[i];
 		while (cur_ptr != NULL) {
 			next_ptr = cur_ptr->next;
 			free(cur_ptr);
 			cur_ptr = next_ptr;
 		}
 		ind->offsets_table[i] = NULL;
 	}
 	free(ind->offsets_table);
 	ind->offsets_table = NULL;
 }

 void append_to_list_with_key(hash_index* ind, unsigned int key, unsigned long offset) {
 	offset_node * new_node = (offset_node*) malloc(sizeof(offset_node));
 	new_node->next = NULL;
 	new_node->offset = offset;
 	if (ind->offsets_table[key] != NULL) {
 		offset_node * existing_node = ind->offsets_table[key];
 		while (existing_node->next != NULL) existing_node = existing_node->next;
 		existing_node->next = new_node;
 	} else {
 		ind->offsets_table[key] = new_node;
 	}
 }

 void read_file_into_index(FILE* f, hash_index* ind) {
 	int chars_read = 0;
 	char buf[1024] = {0};
 	do {
 		memset(buf, 0, sizeof(buf));
 		unsigned long value = ftell(f);
 		chars_read = fscanf(f, "%s", buf);
 		unsigned int key = hash(buf, strlen(buf), ind->size);
 		append_to_list_with_key(ind, key, value);
 	} while (chars_read > 0);
 }

 /*
 format:
 <table_size>
 <key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
 <key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
 ...
 */
 void write_index(FILE* f, hash_index* ind) {
 	fprintf(f, "%d\n", ind->size);
 	for (unsigned int i = 0; i < ind->size; ++i) {
 		if (ind->offsets_table[i] != NULL) {
 			offset_node* cur_ptr = ind->offsets_table[i];
 			fprintf(f, "%d,", i);
 			while (cur_ptr != NULL) {
 				fprintf(f, "%lu,", cur_ptr->offset);
 				cur_ptr = cur_ptr->next;
 			}
 			fprintf(f, "|\n");
 		}
 	}
 }

 void read_index(FILE* f, hash_index* ind) {
 	fscanf(f, "%d", &ind->size);
 	ind->offsets_table = (offset_node**)malloc(sizeof(offset_node*) * ind->size);
 	memset(ind->offsets_table, 0, sizeof(offset_node*) * ind->size);
 	unsigned int key = 0;
 	while (fscanf(f, "%d,", &key) > 0) {
 		int finished_with_this_line = 0;
 		unsigned long offset = 0;
 		while (fscanf(f, "%lu,", &offset)) {
 			append_to_list_with_key(ind, key, offset);
 		}
 		while (fgetc(f) != '\n');
 	}
 }

 int compare_hash_indices(hash_index* i1, hash_index* i2) {
 	if (i1->size != i2->size) return 0;
 	for (int i = 0; i < i1->size; ++i) {
 		if ((i1->offsets_table[i] == NULL || i2->offsets_table[i] == NULL) 
 			&& i1->offsets_table[i] != i2->offsets_table[i]) return 0;
 		else if (i1->offsets_table[i] != NULL) {
 			offset_node* p1 = i1->offsets_table[i];
 			offset_node* p2 = i2->offsets_table[i];
 			while (p1 != NULL) {
 				if (p1->offset != p2->offset) return 0;
 				if (p1->next == NULL && p2->next != NULL) return 0;
 				if (p1->next != NULL && p2->next == NULL) return 0;
 				p1 = p1->next;
 				p2 = p2->next;
 			}
 		}
 	}
 	return 1;
 }

 /* toy implementation of indexing text in lines of a file */
 int main(int argc, char const * argv[]) {

 	/* crude file read - please be secure in real life */
 	FILE* f = fopen(argv[1], "rb");

 	unsigned int max_size = 1000;

 	hash_index fileIndex = { 0 };
 	fileIndex.size = max_size;
 	fileIndex.offsets_table = (offset_node**) malloc ( sizeof(offset_node*) * max_size );
 	memset(fileIndex.offsets_table, 0, max_size);

 	read_file_into_index(f, &fileIndex);
 	fclose(f);

 	printf("%s\n", "writing");
 	FILE* indexfile = fopen(argv[2], "wb");
 	write_index(indexfile, &fileIndex);
 	fclose(indexfile);

 	printf("%s\n", "reading");
 	FILE* readfile = fopen(argv[2], "rb");
 	hash_index checkIndex = {0};
 	read_index(readfile, &checkIndex);
 	fclose(readfile);

 	printf("%s\n", "comparing");
 	if (compare_hash_indices(&fileIndex, &checkIndex)) {
 		printf("%s\n", "equal, write and read were the same");
 	} else {
 		printf("%s\n", "there was an error");
 	}

 	// printf("looking up the word \"hello\"\n");
 	// char* to_find = "hello";
 	// unsigned int hello_hash = hash(to_find, strlen(to_find), checkIndex.size);
 	// printf("hashes to %d\n", hello_hash);
 	// readfile = fopen(argv[1], "rb");
 	// char tmp[1024] = {0};

 	// if (checkIndex.offsets_table[hello_hash] != NULL) {
 	// 	printf("found at %lu\n", checkIndex.offsets_table[hello_hash]->offset);
 	// 	fseek(f, checkIndex.offsets_table[hello_hash]->offset, SEEK_SET);
 	// 	fscanf(f, "%s", tmp);
 	// 	printf("file read gives: %s\n", tmp);
 	// }
 	// fclose(readfile)

 	free_index(&checkIndex);
 	free_index(&fileIndex);
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	struct offset_node_s {
	struct offset_node_s * next;
	unsigned long offset;
	};
	typedef struct offset_node_s offset_node;

	struct index_s {
	unsigned int size;
	offset_node ** offsets_table;
	};

	typedef struct index_s hash_index;

	unsigned int hash(char* str, int strlength, unsigned int max_size) {
	unsigned int value = 0;
	unsigned int tmp = 0;
	for (int i = 0; i < strlength; ++i) {
	tmp \|= (unsigned int)str[i];
	tmp = tmp << sizeof(char);
	value ^= tmp;
	tmp = 0;
	}
	return value % max_size;
	}

	void free_index(hash_index* ind) {
	for (int i = 0; i < ind->size; ++i) {
	offset_node* cur_ptr = NULL;
	offset_node* next_ptr = NULL;
	cur_ptr = ind->offsets_table[i];
	while (cur_ptr != NULL) {
	next_ptr = cur_ptr->next;
	free(cur_ptr);
	cur_ptr = next_ptr;
	}
	ind->offsets_table[i] = NULL;
	}
	free(ind->offsets_table);
	ind->offsets_table = NULL;
	}

	void append_to_list_with_key(hash_index* ind, unsigned int key, unsigned long offset) {
	offset_node * new_node = (offset_node*) malloc(sizeof(offset_node));
	new_node->next = NULL;
	new_node->offset = offset;
	if (ind->offsets_table[key] != NULL) {
	offset_node * existing_node = ind->offsets_table[key];
	while (existing_node->next != NULL) existing_node = existing_node->next;
	existing_node->next = new_node;
	} else {
	ind->offsets_table[key] = new_node;
	}
	}

	void read_file_into_index(FILE* f, hash_index* ind) {
	int chars_read = 0;
	char buf[1024] = {0};
	do {
	memset(buf, 0, sizeof(buf));
	unsigned long value = ftell(f);
	chars_read = fscanf(f, "%s", buf);
	unsigned int key = hash(buf, strlen(buf), ind->size);
	append_to_list_with_key(ind, key, value);
	} while (chars_read > 0);
	}

	/*
	format:
	<table_size>
	<key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
	<key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
	...
	*/
	void write_index(FILE* f, hash_index* ind) {
	fprintf(f, "%d\n", ind->size);
	for (unsigned int i = 0; i < ind->size; ++i) {
	if (ind->offsets_table[i] != NULL) {
	offset_node* cur_ptr = ind->offsets_table[i];
	fprintf(f, "%d,", i);
	while (cur_ptr != NULL) {
	fprintf(f, "%lu,", cur_ptr->offset);
	cur_ptr = cur_ptr->next;
	}
	fprintf(f, "\|\n");
	}
	}
	}

	void read_index(FILE* f, hash_index* ind) {
	fscanf(f, "%d", &ind->size);
	ind->offsets_table = (offset_node*)malloc(sizeof(offset_node) * ind->size);
	memset(ind->offsets_table, 0, sizeof(offset_node) ind->size);
	unsigned int key = 0;
	while (fscanf(f, "%d,", &key) > 0) {
	int finished_with_this_line = 0;
	unsigned long offset = 0;
	while (fscanf(f, "%lu,", &offset)) {
	append_to_list_with_key(ind, key, offset);
	}
	while (fgetc(f) != '\n');
	}
	}

	int compare_hash_indices(hash_index* i1, hash_index* i2) {
	if (i1->size != i2->size) return 0;
	for (int i = 0; i < i1->size; ++i) {
	if ((i1->offsets_table[i] == NULL \|\| i2->offsets_table[i] == NULL)
	&& i1->offsets_table[i] != i2->offsets_table[i]) return 0;
	else if (i1->offsets_table[i] != NULL) {
	offset_node* p1 = i1->offsets_table[i];
	offset_node* p2 = i2->offsets_table[i];
	while (p1 != NULL) {
	if (p1->offset != p2->offset) return 0;
	if (p1->next == NULL && p2->next != NULL) return 0;
	if (p1->next != NULL && p2->next == NULL) return 0;
	p1 = p1->next;
	p2 = p2->next;
	}
	}
	}
	return 1;
	}

	/* toy implementation of indexing text in lines of a file */
	int main(int argc, char const * argv[]) {

	/* crude file read - please be secure in real life */
	FILE* f = fopen(argv[1], "rb");

	unsigned int max_size = 1000;

	hash_index fileIndex = { 0 };
	fileIndex.size = max_size;
	fileIndex.offsets_table = (offset_node*) malloc ( sizeof(offset_node) * max_size );
	memset(fileIndex.offsets_table, 0, max_size);

	read_file_into_index(f, &fileIndex);
	fclose(f);

	printf("%s\n", "writing");
	FILE* indexfile = fopen(argv[2], "wb");
	write_index(indexfile, &fileIndex);
	fclose(indexfile);

	printf("%s\n", "reading");
	FILE* readfile = fopen(argv[2], "rb");
	hash_index checkIndex = {0};
	read_index(readfile, &checkIndex);
	fclose(readfile);

	printf("%s\n", "comparing");
	if (compare_hash_indices(&fileIndex, &checkIndex)) {
	printf("%s\n", "equal, write and read were the same");
	} else {
	printf("%s\n", "there was an error");
	}

	// printf("looking up the word \"hello\"\n");
	// char* to_find = "hello";
	// unsigned int hello_hash = hash(to_find, strlen(to_find), checkIndex.size);
	// printf("hashes to %d\n", hello_hash);
	// readfile = fopen(argv[1], "rb");
	// char tmp[1024] = {0};

	// if (checkIndex.offsets_table[hello_hash] != NULL) {
	// printf("found at %lu\n", checkIndex.offsets_table[hello_hash]->offset);
	// fseek(f, checkIndex.offsets_table[hello_hash]->offset, SEEK_SET);
	// fscanf(f, "%s", tmp);
	// printf("file read gives: %s\n", tmp);
	// }
	// fclose(readfile)

	free_index(&checkIndex);
	free_index(&fileIndex);
	}