nvanderw · September 2, 2012 21:13
diff --git a/dict.c b/dict.c
 #include "dict.h"

 /* Initializes the hash table at table.
 * Arguments:
 *  
 *  init_size: the initial number of chains in the hash table. If 0, defaults
 *             to a hardcoded value.
 *  key_hash:  a hashing function that operates on keys
 *  key_eq:    function which returns nonzero iff two keys are equal.
 * 
 * Returns 1 on success, 0 on memory allocation failure
 */
 int hash_table_init(hash_table_t *table, size_t init_size,
        size_t (*key_hash)(const void*),
        int (*key_eq)(const void *, const void *)) {

    size_t bytes;
    table->mem_size = init_size ? init_size : DEFAULT_SIZE;
    bytes = table->mem_size * sizeof(hash_entry_t*);
    table->mem = (hash_entry_t**)
                 malloc(bytes);
    if(!table->mem)
        return 0;
    table->load = 0;
    table->key_hash = key_hash;
    table->key_eq = key_eq;
    memset(table->mem, 0, bytes); // Invalidate every chain
    return 1;
 }

 /* Frees the memory associated with this table. To prevent memory leaks, make
 * sure to obtain the entries out of this table first using
 * hash_table_to_array().
 */
 void hash_table_release(hash_table_t *table) {
    size_t mem_size = table->mem_size;
    for(size_t i=0;i<mem_size;i++) {
        if(!table->mem[i]) continue;
        hash_entry_t *head, *next;
        for(head = table->mem[i];head;head = next) {
            next = head->next;
            free(head);
        }
    }
    free(table->mem);
 }

 /* Inserts an entry in the given hash table.
 * Arguments:
 *      table:   Location of the hash table struct
 *      key:     Key, hashable by the hashing function with which the table was
 *               initialized.
 *      value:   Value to insert.
 *      evicted: If non-NULL, and if an entry is found with a matching key to
 *               this function's key parameter, the evicted key/value pair will
 *               be stored at this pointer.
 * Returns: Negative on failure, zero on successful insert with no key/value
 *          pairs evicted from the table, positive on successful insert with
 *          key/value pairs evicted from table.
 */
 int hash_table_insert(hash_table_t *table, void *key, void *value,
                      dict_keyval_t *evicted) {

    int ret = hash_table_loose_insert(table, key, value, evicted);
    // If the original insertion failed, also fail.
    if(ret < 0)
        return -1;

    size_t load = table->load;
    dict_keyval_t *pairs;
    hash_table_t new_table;
    if(MAX_LOAD_FACTOR_DEN*load > MAX_LOAD_FACTOR_NUM*table->mem_size) {
        // Table's load factor is too high. Build a new, larger table with the
        // same elements.
        
        new_table = *table;
        new_table.load = 0;
        new_table.mem_size *= 3; // Triple the size of the array
        new_table.mem = (hash_entry_t**)
                        malloc(new_table.mem_size * sizeof(hash_entry_t*));

        memset(new_table.mem, 0, new_table.mem_size * sizeof(hash_entry_t*));
        if(!new_table.mem)
            goto cleanup3;
        pairs = (dict_keyval_t*)
                malloc(table->load * sizeof(dict_keyval_t));
        if(!pairs) {
            goto cleanup2;
        }

        hash_table_to_array(table, pairs);
        for(size_t i=0;i<load;i++) {
            if(hash_table_loose_insert(&new_table, pairs[i].key,
                        pairs[i].value, NULL) < 0) {
                goto cleanup1;
            }
        }
        free(pairs);
        hash_table_release(table);
        table->mem = new_table.mem;
        table->mem_size = new_table.mem_size;
    }
    return ret;

    /* If we fail at any point, be sure to free up used resources and remove
     * the inserted element from the hash table. We don't need to worry about 
     * evicted entries because we know that this insert increased the load
     * factor, so it cannot have simply replaced an existing entry. */
 cleanup1:
    free(new_table.mem);
 cleanup2:
    free(pairs);
 cleanup3:
    hash_table_remove(table, key, NULL);
    return -1;
 }

 /* Finds and removes the entry with the given key from the hash table.
 * Arguments:
 *      table:   Location of the hash table in memory.
 *      key:     Key whose associated entry is to be removed from the table.
 *      evicted: If non-NULL, location where evicted key/value pair will be
 *               placed.
 * Returns: Nonzero on successful removal, zero if key was not found.
 */
 int hash_table_remove(hash_table_t *table, void *key,
                      dict_keyval_t *evicted) {
    size_t hash;
    if(!table->key_hash)
        hash = (size_t) key;
    else
        hash = table->key_hash(key);
    hash %= table->mem_size;

    if(!table->mem[hash])
        return 0;

    hash_entry_t *head, *prev;
    for(head = table->mem[hash];head;head = head->next) {
        if((table->key_eq && table->key_eq(head->key, key)) ||
            !table->key_eq && head->key == key) {
            if(evicted) {
                evicted->key = head->key;
                evicted->value = head->value;
            }

            /* Now we must remove the entry from the list. There are two cases
             * to consider: either this is the first entry in the list, and
             * we need to adjust table->mem[hash], or this is not, and we need
             * to adjust the previous node. */
            if(head == table->mem[hash]) // This is the first entry in list
                table->mem[hash] = head->next;
            else
                prev->next = head->next;
            table->load--;
            return 1;
        }
        prev = head;
    }
    return 0;
 }

 /* Searches the hash table for the given key an
 * Arguments:
 *      table: Location of table in memory.
 *      key:   Key for which we are to search.
 *      pair:  If non-NULL, set to key/value pair at the entry.
 * Returns: Nonzero if the hash table contains the key, zero otherwise.
 */
 int hash_table_get(hash_table_t *table, void *key, dict_keyval_t *pair) {
    size_t hash;

    if(!table->key_hash)
        hash = (size_t) key;
    else
        hash = table->key_hash(key);
    hash %= table->mem_size;

    /* If the hash chain is empty, then fail. */
    if(!table->mem[hash])
        return 0;
    
    hash_entry_t *head;
    for(head = table->mem[hash];head;head = head->next) {
        // If the keys are equal, return true.
        if((table->key_eq && table->key_eq(head->key, key)) ||
            !table->key_eq && head->key == key) {

            if(pair) {
                pair->key = head->key;
                pair->value = head->value;
            }
            return 1;
        }
    }
    /* If we searched the whole chain and didn't find the entry, fail. */
    return 0;
 }

 /* Writes out a series of key-value pairs to the array at "array", which *must*
 * be large enough to hold at least table->load pairs. */
 void hash_table_to_array(hash_table_t *table, dict_keyval_t *array) {
    size_t j = 0;
    for(size_t i=0;i<table->mem_size;i++) {
        if(!table->mem[i]) continue;
        hash_entry_t *head;
        for(head = table->mem[i];head;head = head->next) {
            array[j++] = (dict_keyval_t)
                          {.key = head->key, .value = head->value};
        }
    }
 }

 hash_iter_t hash_table_get_iter(hash_table_t *table) {
    // Put cursor at first entry in the hash table
    hash_iter_t iter;
    iter.table = table;
    iter.chain = 0;
    if(table->mem_size)
        iter.cursor = *table->mem; // Place cursor at first slot in hash table
    return iter;
 }

 int hash_iter_get_next(hash_iter_t *iter, dict_keyval_t *pair) {
    // While our cursor is NULL, advance to the next item
    while(!iter->cursor && iter->chain < (iter->table->mem_size - 1)) {
        iter->cursor = iter->table->mem[++iter->chain];
    }
    if(!iter->cursor && iter->chain >= (iter->table->mem_size - 1))
        return 0;

    // Now iter->cursor points to a valid item in a chain.
    if(pair) {
        pair->key = iter->cursor->key;
        pair->value = iter->cursor->value;
    }
    iter->cursor = iter->cursor->next;
    return 1;
 }

 /* Called by hash_table_insert to actually do the insertion of the element
 * into the table. hash_table_insert then checks if the load factor is in
 * excess of the specified maximum load factor, and then recreates the table
 * if necesary. */
 static int hash_table_loose_insert(hash_table_t *table, void *key, void *value,
                                   dict_keyval_t *evicted) {
    size_t hash;

    if(!table->key_hash)
        hash = (size_t) key;
    else
        hash = table->key_hash(key);
    hash %= table->mem_size; // Get an index into our table

    // If no chain exists yet at this spot, create one
    if(!table->mem[hash]) {
        hash_entry_t *bucket = (hash_entry_t*) malloc(sizeof(hash_entry_t));
        if(!bucket)
            return -1;

        bucket->key = key;
        bucket->value = value;
        bucket->next = NULL;

        table->mem[hash] = bucket;
        table->load++;
        return 0;
    }

    /* Iterate over each entry in the chain, checking for key equality. */
    for(hash_entry_t *head = table->mem[hash];head;head = head->next) {
        if((table->key_eq && table->key_eq(head->key, key)) ||
            !table->key_eq && head->key == key) {
            if(evicted) {
                evicted->key = head->key;
                evicted->value = head->value;
            }
            head->key = key;
            head->value = value;
            return 1;
        }
        if(!head->next) {
            /* We've reached the end of the chain and didn't find the key, so
             * create a new bucket and append it. */
            hash_entry_t *bucket = (hash_entry_t*) malloc(sizeof(hash_entry_t));
            if(!bucket)
                return -1;

            bucket->key = key;
            bucket->value = value;
            bucket->next = NULL;

            head->next = bucket;
            table->load++;

            return 0;
        }
    }

 }
diff --git a/dict.h b/dict.h
 #ifndef _DICT_H_INCLUDED_
 #define _DICT_H_INCLUDED_

 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #define DEFAULT_SIZE 257 // Initial number of chains in the hash table
 #define MAX_LOAD_FACTOR_NUM 1
 #define MAX_LOAD_FACTOR_DEN 1

 /* Structure for a hash table bucket. */
 typedef struct hash_entry_t {
    void *key;
    void *value;
    struct hash_entry_t *next;
 } hash_entry_t;

 /* Generic hash table structure */
 typedef struct hash_table_t {
    hash_entry_t **mem; // Region of memory containing the chains
    size_t mem_size; // Number of hash_entry_t pointers in mem
    size_t load; // Number of elements in the hash table
    size_t (*key_hash)(const void *); // Hashing function for keys
    int (*key_eq)(const void *, const void *); // Equality function for keys
 } hash_table_t;

 typedef struct dict_keyval_t {
    void *key;
    void *value;
 } dict_keyval_t;

 // Iterator state for getting key-value pairs from the hash table
 typedef struct hash_iter_t {
    hash_table_t *table;
    size_t chain;
    hash_entry_t *cursor;
 } hash_iter_t;

 int hash_table_init(hash_table_t *table, size_t init_size,
        size_t (*key_hash)(const void*),
        int (*key_eq)(const void *, const void *));

 void hash_table_release(hash_table_t *table);
 int hash_table_insert(hash_table_t *table, void *key, void *value,
                      dict_keyval_t *evicted);
 int hash_table_remove(hash_table_t *table, void *key,
                      dict_keyval_t *evicted);
 int hash_table_get(hash_table_t *table, void *key, dict_keyval_t *pair);
 void hash_table_to_array(hash_table_t *table, dict_keyval_t *array);

 // Iterator functions
 hash_iter_t hash_table_get_iter(hash_table_t *table);
 int hash_iter_get_next(hash_iter_t *iter, dict_keyval_t *pair);

 // Internal insertion helper function
 static int hash_table_loose_insert(hash_table_t *table, void *key, void *value,
                                   dict_keyval_t *evicted);
 #endif
	#include "dict.h"

	/* Initializes the hash table at table.
	* Arguments:
	*
	* init_size: the initial number of chains in the hash table. If 0, defaults
	* to a hardcoded value.
	* key_hash: a hashing function that operates on keys
	* key_eq: function which returns nonzero iff two keys are equal.
	*
	* Returns 1 on success, 0 on memory allocation failure
	*/
	int hash_table_init(hash_table_t *table, size_t init_size,
	size_t (key_hash)(const void),
	int (key_eq)(const void , const void *)) {

	size_t bytes;
	table->mem_size = init_size ? init_size : DEFAULT_SIZE;
	bytes = table->mem_size * sizeof(hash_entry_t*);
	table->mem = (hash_entry_t**)
	malloc(bytes);
	if(!table->mem)
	return 0;
	table->load = 0;
	table->key_hash = key_hash;
	table->key_eq = key_eq;
	memset(table->mem, 0, bytes); // Invalidate every chain
	return 1;
	}

	/* Frees the memory associated with this table. To prevent memory leaks, make
	* sure to obtain the entries out of this table first using
	* hash_table_to_array().
	*/
	void hash_table_release(hash_table_t *table) {
	size_t mem_size = table->mem_size;
	for(size_t i=0;i<mem_size;i++) {
	if(!table->mem[i]) continue;
	hash_entry_t head, next;
	for(head = table->mem[i];head;head = next) {
	next = head->next;
	free(head);
	}
	}
	free(table->mem);
	}

	/* Inserts an entry in the given hash table.
	* Arguments:
	* table: Location of the hash table struct
	* key: Key, hashable by the hashing function with which the table was
	* initialized.
	* value: Value to insert.
	* evicted: If non-NULL, and if an entry is found with a matching key to
	* this function's key parameter, the evicted key/value pair will
	* be stored at this pointer.
	* Returns: Negative on failure, zero on successful insert with no key/value
	* pairs evicted from the table, positive on successful insert with
	* key/value pairs evicted from table.
	*/
	int hash_table_insert(hash_table_t table, void key, void *value,
	dict_keyval_t *evicted) {

	int ret = hash_table_loose_insert(table, key, value, evicted);
	// If the original insertion failed, also fail.
	if(ret < 0)
	return -1;

	size_t load = table->load;
	dict_keyval_t *pairs;
	hash_table_t new_table;
	if(MAX_LOAD_FACTOR_DENload > MAX_LOAD_FACTOR_NUMtable->mem_size) {
	// Table's load factor is too high. Build a new, larger table with the
	// same elements.

	new_table = *table;
	new_table.load = 0;
	new_table.mem_size *= 3; // Triple the size of the array
	new_table.mem = (hash_entry_t**)
	malloc(new_table.mem_size * sizeof(hash_entry_t*));

	memset(new_table.mem, 0, new_table.mem_size * sizeof(hash_entry_t*));
	if(!new_table.mem)
	goto cleanup3;
	pairs = (dict_keyval_t*)
	malloc(table->load * sizeof(dict_keyval_t));
	if(!pairs) {
	goto cleanup2;
	}

	hash_table_to_array(table, pairs);
	for(size_t i=0;i<load;i++) {
	if(hash_table_loose_insert(&new_table, pairs[i].key,
	pairs[i].value, NULL) < 0) {
	goto cleanup1;
	}
	}
	free(pairs);
	hash_table_release(table);
	table->mem = new_table.mem;
	table->mem_size = new_table.mem_size;
	}
	return ret;

	/* If we fail at any point, be sure to free up used resources and remove
	* the inserted element from the hash table. We don't need to worry about
	* evicted entries because we know that this insert increased the load
	* factor, so it cannot have simply replaced an existing entry. */
	cleanup1:
	free(new_table.mem);
	cleanup2:
	free(pairs);
	cleanup3:
	hash_table_remove(table, key, NULL);
	return -1;
	}

	/* Finds and removes the entry with the given key from the hash table.
	* Arguments:
	* table: Location of the hash table in memory.
	* key: Key whose associated entry is to be removed from the table.
	* evicted: If non-NULL, location where evicted key/value pair will be
	* placed.
	* Returns: Nonzero on successful removal, zero if key was not found.
	*/
	int hash_table_remove(hash_table_t table, void key,
	dict_keyval_t *evicted) {
	size_t hash;
	if(!table->key_hash)
	hash = (size_t) key;
	else
	hash = table->key_hash(key);
	hash %= table->mem_size;

	if(!table->mem[hash])
	return 0;

	hash_entry_t head, prev;
	for(head = table->mem[hash];head;head = head->next) {
	if((table->key_eq && table->key_eq(head->key, key)) \|\|
	!table->key_eq && head->key == key) {
	if(evicted) {
	evicted->key = head->key;
	evicted->value = head->value;
	}

	/* Now we must remove the entry from the list. There are two cases
	* to consider: either this is the first entry in the list, and
	* we need to adjust table->mem[hash], or this is not, and we need
	* to adjust the previous node. */
	if(head == table->mem[hash]) // This is the first entry in list
	table->mem[hash] = head->next;
	else
	prev->next = head->next;
	table->load--;
	return 1;
	}
	prev = head;
	}
	return 0;
	}

	/* Searches the hash table for the given key an
	* Arguments:
	* table: Location of table in memory.
	* key: Key for which we are to search.
	* pair: If non-NULL, set to key/value pair at the entry.
	* Returns: Nonzero if the hash table contains the key, zero otherwise.
	*/
	int hash_table_get(hash_table_t table, void key, dict_keyval_t *pair) {
	size_t hash;

	if(!table->key_hash)
	hash = (size_t) key;
	else
	hash = table->key_hash(key);
	hash %= table->mem_size;

	/* If the hash chain is empty, then fail. */
	if(!table->mem[hash])
	return 0;

	hash_entry_t *head;
	for(head = table->mem[hash];head;head = head->next) {
	// If the keys are equal, return true.
	if((table->key_eq && table->key_eq(head->key, key)) \|\|
	!table->key_eq && head->key == key) {

	if(pair) {
	pair->key = head->key;
	pair->value = head->value;
	}
	return 1;
	}
	}
	/* If we searched the whole chain and didn't find the entry, fail. */
	return 0;
	}

	/* Writes out a series of key-value pairs to the array at "array", which must
	* be large enough to hold at least table->load pairs. */
	void hash_table_to_array(hash_table_t table, dict_keyval_t array) {
	size_t j = 0;
	for(size_t i=0;i<table->mem_size;i++) {
	if(!table->mem[i]) continue;
	hash_entry_t *head;
	for(head = table->mem[i];head;head = head->next) {
	array[j++] = (dict_keyval_t)
	{.key = head->key, .value = head->value};
	}
	}
	}

	hash_iter_t hash_table_get_iter(hash_table_t *table) {
	// Put cursor at first entry in the hash table
	hash_iter_t iter;
	iter.table = table;
	iter.chain = 0;
	if(table->mem_size)
	iter.cursor = *table->mem; // Place cursor at first slot in hash table
	return iter;
	}

	int hash_iter_get_next(hash_iter_t iter, dict_keyval_t pair) {
	// While our cursor is NULL, advance to the next item
	while(!iter->cursor && iter->chain < (iter->table->mem_size - 1)) {
	iter->cursor = iter->table->mem[++iter->chain];
	}
	if(!iter->cursor && iter->chain >= (iter->table->mem_size - 1))
	return 0;

	// Now iter->cursor points to a valid item in a chain.
	if(pair) {
	pair->key = iter->cursor->key;
	pair->value = iter->cursor->value;
	}
	iter->cursor = iter->cursor->next;
	return 1;
	}

	/* Called by hash_table_insert to actually do the insertion of the element
	* into the table. hash_table_insert then checks if the load factor is in
	* excess of the specified maximum load factor, and then recreates the table
	* if necesary. */
	static int hash_table_loose_insert(hash_table_t table, void key, void *value,
	dict_keyval_t *evicted) {
	size_t hash;

	if(!table->key_hash)
	hash = (size_t) key;
	else
	hash = table->key_hash(key);
	hash %= table->mem_size; // Get an index into our table

	// If no chain exists yet at this spot, create one
	if(!table->mem[hash]) {
	hash_entry_t bucket = (hash_entry_t) malloc(sizeof(hash_entry_t));
	if(!bucket)
	return -1;

	bucket->key = key;
	bucket->value = value;
	bucket->next = NULL;

	table->mem[hash] = bucket;
	table->load++;
	return 0;
	}

	/* Iterate over each entry in the chain, checking for key equality. */
	for(hash_entry_t *head = table->mem[hash];head;head = head->next) {
	if((table->key_eq && table->key_eq(head->key, key)) \|\|
	!table->key_eq && head->key == key) {
	if(evicted) {
	evicted->key = head->key;
	evicted->value = head->value;
	}
	head->key = key;
	head->value = value;
	return 1;
	}
	if(!head->next) {
	/* We've reached the end of the chain and didn't find the key, so
	* create a new bucket and append it. */
	hash_entry_t bucket = (hash_entry_t) malloc(sizeof(hash_entry_t));
	if(!bucket)
	return -1;

	bucket->key = key;
	bucket->value = value;
	bucket->next = NULL;

	head->next = bucket;
	table->load++;

	return 0;
	}
	}

	}
	#ifndef _DICT_H_INCLUDED_
	#define _DICT_H_INCLUDED_

	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>
	#define DEFAULT_SIZE 257 // Initial number of chains in the hash table
	#define MAX_LOAD_FACTOR_NUM 1
	#define MAX_LOAD_FACTOR_DEN 1

	/* Structure for a hash table bucket. */
	typedef struct hash_entry_t {
	void *key;
	void *value;
	struct hash_entry_t *next;
	} hash_entry_t;

	/* Generic hash table structure */
	typedef struct hash_table_t {
	hash_entry_t **mem; // Region of memory containing the chains
	size_t mem_size; // Number of hash_entry_t pointers in mem
	size_t load; // Number of elements in the hash table
	size_t (key_hash)(const void ); // Hashing function for keys
	int (key_eq)(const void , const void *); // Equality function for keys
	} hash_table_t;

	typedef struct dict_keyval_t {
	void *key;
	void *value;
	} dict_keyval_t;

	// Iterator state for getting key-value pairs from the hash table
	typedef struct hash_iter_t {
	hash_table_t *table;
	size_t chain;
	hash_entry_t *cursor;
	} hash_iter_t;

	int hash_table_init(hash_table_t *table, size_t init_size,
	size_t (key_hash)(const void),
	int (key_eq)(const void , const void *));

	void hash_table_release(hash_table_t *table);
	int hash_table_insert(hash_table_t table, void key, void *value,
	dict_keyval_t *evicted);
	int hash_table_remove(hash_table_t table, void key,
	dict_keyval_t *evicted);
	int hash_table_get(hash_table_t table, void key, dict_keyval_t *pair);
	void hash_table_to_array(hash_table_t table, dict_keyval_t array);

	// Iterator functions
	hash_iter_t hash_table_get_iter(hash_table_t *table);
	int hash_iter_get_next(hash_iter_t iter, dict_keyval_t pair);

	// Internal insertion helper function
	static int hash_table_loose_insert(hash_table_t table, void key, void *value,
	dict_keyval_t *evicted);
	#endif