Skip to content

Instantly share code, notes, and snippets.

@Sammons
Created January 3, 2016 08:42
Show Gist options
  • Save Sammons/d4db049d0206e23230e1 to your computer and use it in GitHub Desktop.
Save Sammons/d4db049d0206e23230e1 to your computer and use it in GitHub Desktop.
toy hash table of lines of text in a file
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct offset_node_s {
struct offset_node_s * next;
unsigned long offset;
};
typedef struct offset_node_s offset_node;
struct index_s {
unsigned int size;
offset_node ** offsets_table;
};
typedef struct index_s hash_index;
unsigned int hash(char* str, int strlength, unsigned int max_size) {
unsigned int value = 0;
unsigned int tmp = 0;
for (int i = 0; i < strlength; ++i) {
tmp |= (unsigned int)str[i];
tmp = tmp << sizeof(char);
value ^= tmp;
tmp = 0;
}
return value % max_size;
}
void free_index(hash_index* ind) {
for (int i = 0; i < ind->size; ++i) {
offset_node* cur_ptr = NULL;
offset_node* next_ptr = NULL;
cur_ptr = ind->offsets_table[i];
while (cur_ptr != NULL) {
next_ptr = cur_ptr->next;
free(cur_ptr);
cur_ptr = next_ptr;
}
ind->offsets_table[i] = NULL;
}
free(ind->offsets_table);
ind->offsets_table = NULL;
}
void append_to_list_with_key(hash_index* ind, unsigned int key, unsigned long offset) {
offset_node * new_node = (offset_node*) malloc(sizeof(offset_node));
new_node->next = NULL;
new_node->offset = offset;
if (ind->offsets_table[key] != NULL) {
offset_node * existing_node = ind->offsets_table[key];
while (existing_node->next != NULL) existing_node = existing_node->next;
existing_node->next = new_node;
} else {
ind->offsets_table[key] = new_node;
}
}
void read_file_into_index(FILE* f, hash_index* ind) {
int chars_read = 0;
char buf[1024] = {0};
do {
memset(buf, 0, sizeof(buf));
unsigned long value = ftell(f);
chars_read = fscanf(f, "%s", buf);
unsigned int key = hash(buf, strlen(buf), ind->size);
append_to_list_with_key(ind, key, value);
} while (chars_read > 0);
}
/*
format:
<table_size>
<key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
<key unsigned int>,<unsigned long offset 1>,<unsigned long offset 2>,<unsigned long offset 3>...\n
...
*/
void write_index(FILE* f, hash_index* ind) {
fprintf(f, "%d\n", ind->size);
for (unsigned int i = 0; i < ind->size; ++i) {
if (ind->offsets_table[i] != NULL) {
offset_node* cur_ptr = ind->offsets_table[i];
fprintf(f, "%d,", i);
while (cur_ptr != NULL) {
fprintf(f, "%lu,", cur_ptr->offset);
cur_ptr = cur_ptr->next;
}
fprintf(f, "|\n");
}
}
}
void read_index(FILE* f, hash_index* ind) {
fscanf(f, "%d", &ind->size);
ind->offsets_table = (offset_node**)malloc(sizeof(offset_node*) * ind->size);
memset(ind->offsets_table, 0, sizeof(offset_node*) * ind->size);
unsigned int key = 0;
while (fscanf(f, "%d,", &key) > 0) {
int finished_with_this_line = 0;
unsigned long offset = 0;
while (fscanf(f, "%lu,", &offset)) {
append_to_list_with_key(ind, key, offset);
}
while (fgetc(f) != '\n');
}
}
int compare_hash_indices(hash_index* i1, hash_index* i2) {
if (i1->size != i2->size) return 0;
for (int i = 0; i < i1->size; ++i) {
if ((i1->offsets_table[i] == NULL || i2->offsets_table[i] == NULL)
&& i1->offsets_table[i] != i2->offsets_table[i]) return 0;
else if (i1->offsets_table[i] != NULL) {
offset_node* p1 = i1->offsets_table[i];
offset_node* p2 = i2->offsets_table[i];
while (p1 != NULL) {
if (p1->offset != p2->offset) return 0;
if (p1->next == NULL && p2->next != NULL) return 0;
if (p1->next != NULL && p2->next == NULL) return 0;
p1 = p1->next;
p2 = p2->next;
}
}
}
return 1;
}
/* toy implementation of indexing text in lines of a file */
int main(int argc, char const * argv[]) {
/* crude file read - please be secure in real life */
FILE* f = fopen(argv[1], "rb");
unsigned int max_size = 1000;
hash_index fileIndex = { 0 };
fileIndex.size = max_size;
fileIndex.offsets_table = (offset_node**) malloc ( sizeof(offset_node*) * max_size );
memset(fileIndex.offsets_table, 0, max_size);
read_file_into_index(f, &fileIndex);
fclose(f);
printf("%s\n", "writing");
FILE* indexfile = fopen(argv[2], "wb");
write_index(indexfile, &fileIndex);
fclose(indexfile);
printf("%s\n", "reading");
FILE* readfile = fopen(argv[2], "rb");
hash_index checkIndex = {0};
read_index(readfile, &checkIndex);
fclose(readfile);
printf("%s\n", "comparing");
if (compare_hash_indices(&fileIndex, &checkIndex)) {
printf("%s\n", "equal, write and read were the same");
} else {
printf("%s\n", "there was an error");
}
// printf("looking up the word \"hello\"\n");
// char* to_find = "hello";
// unsigned int hello_hash = hash(to_find, strlen(to_find), checkIndex.size);
// printf("hashes to %d\n", hello_hash);
// readfile = fopen(argv[1], "rb");
// char tmp[1024] = {0};
// if (checkIndex.offsets_table[hello_hash] != NULL) {
// printf("found at %lu\n", checkIndex.offsets_table[hello_hash]->offset);
// fseek(f, checkIndex.offsets_table[hello_hash]->offset, SEEK_SET);
// fscanf(f, "%s", tmp);
// printf("file read gives: %s\n", tmp);
// }
// fclose(readfile)
free_index(&checkIndex);
free_index(&fileIndex);
}
Copy link

ghost commented Jan 4, 2016

Thanks for the help , how could I compile the code ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment