Created
May 5, 2016 20:36
-
-
Save MylesBorins/b7c5520e3f0e9e76ced2239116d798b4 to your computer and use it in GitHub Desktop.
Not quite fast map
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* File: cmap.c | |
* Author: Myles Borins | |
* ---------------------- | |
* | |
*/ | |
#include "cmap.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <search.h> | |
/* Type: struct CMapImplementation | |
* ------------------------------- | |
* This definition completes the CMap type that was declared in | |
* cmap.h. | |
*/ | |
struct CMapImplementation { | |
int valueSize; | |
int count; | |
int bucketCount; | |
CMapCleanupValueFn cleanupFn; | |
char **buckets; | |
}; | |
// Defines a default value to be used if the user does not provide a capacityHint | |
#define INIT_ALLOCATION_SIZE 8 | |
/* Function: Hash | |
* -------------- | |
* This function adapted from Eric Roberts' _The Art and Science of C_ | |
* It takes a string and uses it to derive a "hash code," which | |
* is an integer in the range [0..NumBuckets-1]. The hash code is computed | |
* using a method called "linear congruence." A similar function using this | |
* method is described on page 144 of Kernighan and Ritchie. The choice of | |
* the value for the Multiplier can have a significant effort on the | |
* performance of the algorithm, but not on its correctness. | |
* This hash function is case-sensitive, "ZELENSKI" and "Zelenski" are | |
* not guaranteed to hash to same code. | |
*/ | |
static int Hash(const char *s, int numBuckets) | |
{ | |
const unsigned long MULTIPLIER = 2630849305L; // magic prime number | |
unsigned long hashcode = 0; | |
for (int i = 0; s[i] != '\0'; i++) | |
hashcode = hashcode * MULTIPLIER + s[i]; | |
return hashcode % numBuckets; | |
} | |
/** | |
* Function:LoadFactor | |
* ------------------------------ | |
* This helper function is used to calculat ethe Load Factor | |
* which is the ratio of currently stored items to total buckets | |
*/ | |
int LoadFactor(CMap *cm) | |
{ | |
return cm->count / cm->bucketCount; | |
} | |
/** | |
* Function: DisectBlob | |
* ------------------------------ | |
* This helper function takes a bob and pointers to a key pointer and elemAddr pointer | |
* The function assigns the appropriate pointers using pointer arithmatic | |
*/ | |
void DisectBlob(char *blob, char **key, char **elemAddr) | |
{ | |
*key = blob + sizeof(char *); | |
*elemAddr = *key + strlen(*key) + 1; | |
} | |
/** | |
* Function: MakeBlob | |
* ------------------------------ | |
* At the lowest level the map is a collection of blobs. This helper function is used | |
* to create the binary blobs used to store all the data | |
*/ | |
char *MakeBlob(const char *key, const void *elemAddr) | |
{ | |
char *blob = malloc(sizeof(char *) + strlen(key) + 1 + sizeof(elemAddr)); | |
char *insertKey = blob + sizeof(char *); | |
char *insertElem = blob + sizeof(char *) + strlen(key) + 1; | |
*(char **)blob = NULL; | |
strcpy(insertKey, key); | |
memcpy(insertElem, elemAddr, sizeof(insertElem)); | |
return blob; | |
} | |
/** | |
* Function: FindBlob | |
* ------------------------------ | |
* Sometimes we need to find a blob within a linked list | |
* This function recusively goes through the linked list returning a pointer | |
* to a blob containing a key. If no blob is found -1 is returned | |
*/ | |
char *FindBlob(char *blob, const char *key) | |
{ | |
if(blob == NULL) | |
{ | |
return NULL; | |
} | |
char *blobKey = blob + sizeof(char *); | |
char *nextBlob = *(char **)blob; | |
if(strcmp(blobKey, key) == 0) | |
{ | |
return blob; | |
} | |
else if (nextBlob != NULL) | |
{ | |
return FindBlob(nextBlob, key); | |
} | |
return NULL; | |
} | |
/** | |
* Function: CleanBlob | |
* ------------------------------ | |
* When blobs are destroyed they need to be properly cleaned using the | |
* provided cleanupFn | |
*/ | |
void CleanBlob(CMap *cm, char *blob) | |
{ | |
char *key, *valueAddr; | |
DisectBlob(blob, &key, &valueAddr); | |
if(cm->cleanupFn != NULL) | |
cm->cleanupFn(valueAddr); | |
} | |
/** | |
* Function: DeleteBlob | |
* ------------------------------ | |
* Want to get rid of a blob? This function takes car of all the nasty pointer logic | |
* of removing a blob, cleaning the memory, and properly setting the pointers in the linked | |
* list. The one bit of crafty logic here is returning the returing of a pointer. This logic | |
* tkes care of resetting all the pointers in the link list. | |
* | |
* Some inspiration taken from http://www.cs.bu.edu/teaching/c/linked-list/delete/ | |
*/ | |
char *DeleteBlob(CMap *cm, char *blob, const char *key) | |
{ | |
char *keyAddr, *valueAddr; | |
DisectBlob(blob, &keyAddr, &valueAddr); | |
char *nextBlob = *(char **)blob; | |
if(strcmp(key, keyAddr) == 0) | |
{ | |
if(cm->cleanupFn != NULL) | |
cm->cleanupFn(valueAddr); | |
free(blob); | |
cm->count--; | |
return nextBlob; | |
} | |
else if (nextBlob == NULL) | |
{ | |
return blob; | |
} | |
*(char **)blob = DeleteBlob(cm, nextBlob, key); | |
return blob; | |
} | |
/** | |
* Function: CMapCreate | |
* Usage: CMap *mymap = CMapCreate(sizeof(int), 10, NULL); | |
* ------------------------------------------------------ | |
* Creates a new empty CMap and returns a pointer to it. The pointer | |
* points to storage allocated in the heap. When done with this CMap, the | |
* client must call CMapDispose to dispose of it. If allocation fails, an | |
* assert is raised. | |
* | |
* The valueSize parameter specifies the size, in bytes, of the | |
* values that will be stored in the CMap. For example, to store values of | |
* type double, pass sizeof(double) for the valueSize. Note that all | |
* values stored in a given CMap must be of the same type. An assert is raised | |
* if valueSize is not greater than zero. | |
* | |
* The capacityHint parameter allows the client to tune the resizing behavior for | |
* their needs. The CMap's internal storage will initially be optimized to hold | |
* this number of entries. This capacityHint is not a binding limit, just a hint. | |
* If the initial allocation is outgrown, the storage capacity will double in size. | |
* If planning to store many elements, specifying a large capacityHint will | |
* result in an appropriately large initial allocation and fewer resizing operations | |
* later. For a small map, a small capacityHint will result in several smaller | |
* allocations and potentially less waste. If capacityHint is 0, an internal | |
* default value is used. An assert is raised if capacityHint is negative. | |
* | |
* The cleanupFn is a client callback that will be called on a value being | |
* removed/replaced (via CMapRemove/CMapPut, respectively) and on every value | |
* in the CMap when it is destroyed (via CMapDispose). The client can use this | |
* function to do any deallocation/cleanup required for the value, | |
* such as freeing memory to which the value points (if the value is a | |
* pointer). The client can pass NULL for cleanupFn if values don't | |
* require any special cleanup. | |
*/ | |
CMap *CMapCreate(int valueSize, int capacityHint, CMapCleanupValueFn fn) | |
{ | |
assert(valueSize > 0); | |
CMap *map = malloc(sizeof(struct CMapImplementation)); | |
assert(map != NULL); | |
assert(capacityHint >= 0); | |
if(!capacityHint) | |
capacityHint = INIT_ALLOCATION_SIZE; | |
map->bucketCount = capacityHint; | |
map->count = 0; | |
map->cleanupFn = fn; | |
map->valueSize = valueSize; | |
map->buckets = calloc(map->bucketCount, sizeof(char *)); | |
assert(map->buckets != NULL); | |
return map; | |
} | |
/** | |
* Function: CleanBucket | |
* --------------------------- | |
* Helper function fo CMapDispose. When you want to clean up a bucket it is | |
* way simpler to do so recursively. Dispose calls this function to take care | |
* of the elements in the buckets | |
*/ | |
void CleanBucket(CMap *cm, char *bucket) | |
{ | |
if(bucket == NULL) | |
return; | |
char *keyAddr, *valueAddr; | |
DisectBlob(bucket, &keyAddr, &valueAddr); | |
char *nextBlob = *(char **)bucket; | |
if(nextBlob != NULL) | |
{ | |
CleanBucket(cm, nextBlob); | |
} | |
if(cm->cleanupFn) | |
{ | |
cm->cleanupFn(valueAddr); | |
} | |
free(bucket); | |
} | |
/** | |
* Function: CMapDispose | |
* Usage: CMapDispose(mymap); | |
* --------------------------- | |
* Disposes of this CMap and deallocates its memory, including the string keys that | |
* CMap copied. It does *not* automatically free memory owned by pointers | |
* embedded within the values. This would require knowledge of the structure | |
* of the values, which the CMap does not have. It *does* loop over the | |
* values calling the cleanupFn that was supplied to CMapCreate when the given | |
* CMap was created. | |
*/ | |
void CMapDispose(CMap *cm) | |
{ | |
int bucketCount = cm->bucketCount; | |
for (int i = 0; i < bucketCount; i++) | |
{ | |
char *list = cm->buckets[i]; | |
if(list != NULL); | |
CleanBucket(cm, list); | |
} | |
free(cm->buckets); | |
free(cm); | |
} | |
/** | |
* Function: CMapCount | |
* Usage: int count = CMapCount(mymap); | |
* ------------------------------------- | |
* Returns the number of entries currently stored in this CMap. Operates in | |
* constant-time. | |
*/ | |
int CMapCount(const CMap *cm) | |
{ | |
return cm->count; | |
} | |
/** | |
* Function: CMapGet | |
* Usage: int val = *(int *)CMapGet(mymap, "CS107"); | |
* -------------------------------------------------- | |
* Searches this CMap for an entry with the given key and if found, returns | |
* a pointer to its associated value. If key is not found, then NULL is | |
* returned as a sentinel. Note this function returns a pointer into the CMap's | |
* storage, so the pointer should be used with care. In particular, the pointer | |
* returned by CMapGet can become invalid after a call that adds or removes | |
* entries within the CMap. Note that keys are compared case-sensitively, | |
* e.g. "binky" is not the same key as "BinKy". Operates in constant-time. | |
*/ | |
void *CMapGet(const CMap *cm, const char *key) | |
{ | |
int hash = Hash(key, cm->bucketCount); | |
char *bucket = cm->buckets[hash]; | |
char *blob = FindBlob(bucket, key); | |
if (blob == NULL) | |
return blob; | |
char *keyPtr, *elemAddrPtr; | |
DisectBlob(blob, &keyPtr, &elemAddrPtr); | |
return elemAddrPtr; | |
} | |
/** | |
* Function: InsertBlob | |
* ------------------------------ | |
* Sometimes you want to put a blob in a bucket and not really think about it | |
* this function lets you you specify a bucket / blob and then inserts the blob | |
* while maintaining the order of the linked list. | |
*/ | |
void InsertBlob(char **bucket, char *blob) | |
{ | |
if (*bucket == NULL) | |
{ | |
*bucket = blob; | |
} | |
else | |
{ | |
char **nextLink = (char **)*bucket; | |
InsertBlob(nextLink, blob); | |
} | |
} | |
/** | |
* Function: ReHash | |
* ------------------------------ | |
* I can has buckets? | |
* If you LoadFactor is >= 2 then you need to double your buckets | |
* This Rehash function helps move everything around | |
*/ | |
void Rehash(CMap *cm) { | |
int newCapacity = cm->bucketCount * 2; | |
char **newBuckets = calloc(newCapacity, sizeof(cm->valueSize)); | |
for (int i = 0; i < cm->bucketCount; i++) | |
{ | |
char **bucket = &(cm->buckets[i]); | |
char *blob = *bucket; | |
if (blob != NULL) | |
{ | |
while(blob) | |
{ | |
char *keyAddr, *valueAddr; | |
DisectBlob(blob, &keyAddr, &valueAddr); | |
char *nextBlob = *(char **)blob; | |
*(char **)blob = NULL; | |
int hash = Hash(keyAddr, newCapacity); | |
InsertBlob(&(newBuckets[hash]), blob); | |
blob = nextBlob; | |
} | |
} | |
} | |
free(cm->buckets); | |
cm->buckets = newBuckets; | |
cm->bucketCount = newCapacity; | |
} | |
/** | |
* Function: CMapPut | |
* Usage: CMapPut(mymap, "CS107", &val); | |
* -------------------------------------- | |
* Associates the given key with a new value in this CMap. If there is an existing | |
* value for the key, it is replaced with the new value. Before being overwritten, | |
* the cleanupFn that was supplied to CMapCreate is called on the old value. The | |
* new value is copied from the memory pointed to by valueAddr. | |
* A copy of the key string is made by the CMap to store internally. Note that keys | |
* are compared case-sensitively, e.g. "binky" is not the same key as | |
* "BinKy". The capacity is enlarged if necessary, an assert is raised | |
* on allocation failure. Operates in constant-time (amortized). | |
*/ | |
void CMapPut(CMap *cm, const char *key, const void *elemAddr) | |
{ | |
if(LoadFactor(cm) >= 2) | |
Rehash(cm); | |
int index = Hash(key, cm->bucketCount); | |
if(cm->buckets[index] == NULL) | |
{ | |
cm->buckets[index] = MakeBlob(key, elemAddr); | |
cm->count++; | |
} | |
else | |
{ | |
char *blob = FindBlob(cm->buckets[index], key); | |
char *newBlob = MakeBlob(key, elemAddr); | |
assert(newBlob != NULL); | |
if (blob == NULL) | |
{ | |
InsertBlob(&(cm->buckets[index]), newBlob); | |
cm->count++; | |
} | |
else | |
{ | |
CMapRemove(cm, key); | |
InsertBlob(&(cm->buckets[index]), newBlob); | |
cm->count++; | |
} | |
} | |
} | |
/** | |
* Function: CMapRemove | |
* Usage: CMapRemove(mymap, "CS107"); | |
* ----------------------------------- | |
* Searches this CMap for an entry with the given key and if found, removes that | |
* key and its associated value. If key is not found, no changes are made. | |
* Before a value is removed, the cleanupFn that was supplied to CMapCreate will | |
* be called on the value. The CMap frees the copy of the key string it made. | |
* Note that keys are compared case-sensitively, e.g. "binky" is not the same | |
* key as "BinKy". Operates in constant-time. | |
*/ | |
void CMapRemove(CMap *cm, const char *key) | |
{ | |
int hash = Hash(key, cm->bucketCount); | |
if(cm->buckets[hash]) | |
{ | |
cm->buckets[hash] = DeleteBlob(cm, cm->buckets[hash], key); | |
} | |
} | |
/** | |
* Function: BlobMap | |
* -------------------------------------------- | |
* A helper function for CMapMap to recusively go through linked list. | |
*/ | |
void BlobMap(char *bucket, CMapMapEntryFn mapfn, void *auxData) | |
{ | |
char *nextBucket = *(char **)bucket; | |
if(nextBucket) | |
{ | |
BlobMap(nextBucket, mapfn, auxData); | |
} | |
char *key = bucket + sizeof(char *); | |
void *valueAddr = bucket + sizeof(char *) + strlen(key) + 1; | |
mapfn(key, valueAddr, auxData); | |
} | |
/** | |
* Function: CMapMap | |
* Usage: CMapMap(mymap, PrintEntry, &myData); | |
* -------------------------------------------- | |
* Iterates over all of the entries in this CMap and calls mapfn on | |
* each entry. The callback function is called with a char * key, a pointer | |
* to its associated value, and the auxData pointer. The auxData value allows | |
* the client to pass extra state information if desired. If no client data is | |
* needed, this argument can be NULL. The entries are processed in arbitrary | |
* order. Operates in linear-time. | |
*/ | |
void CMapMap(CMap *cm, CMapMapEntryFn mapfn, void *auxData) | |
{ | |
for (int i = 0; i < cm->bucketCount; i++) | |
{ | |
if(cm->buckets[i] != NULL) | |
BlobMap(cm->buckets[i], mapfn, auxData); | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* File: cmap.h | |
* ------------ | |
* Defines the interface for the CMap type. | |
* | |
* The CMap manages a collection of key-value pairs or "entries". The keys | |
* are always of type char *, but the values can be of any desired type. The | |
* main operations are to associate a value with a key and to retrieve | |
* the value associated with a key. In order to work for all types of values, | |
* all CMap values must be passed and returned via (void *) pointers. | |
* Given its extensive use of untyped pointers, the CMap is a bit tricky | |
* to use correctly as a client. Be diligent! | |
* | |
* CS107 jzelenski | |
*/ | |
#ifndef _cmap_h | |
#define _cmap_h | |
/** | |
* Type: CMapMapEntryFn | |
* -------------------- | |
* CMapMapEntryFn is the typename for a pointer to a client-supplied mapping | |
* function. Such function pointers are passed to CMapMap to apply to each | |
* (key, value) pair in the CMap. The mapping function takes a char* key, | |
* a void* pointer to its associated value, and another void* for | |
* extra data passed by the client. | |
*/ | |
typedef void (*CMapMapEntryFn)(const char *key, void *valueAddr, void *auxData); | |
/** | |
* Type: CMapCleanupValueFn | |
* ------------------------ | |
* CMapCleanupValueFn is the typename for a pointer to a client-supplied cleanup | |
* function. Such function pointers are passed to CMapCreate to apply | |
* to a value when it is removed from the CMap. The cleanup function takes one | |
* void* pointer that points to the value about to be removed. | |
*/ | |
typedef void (*CMapCleanupValueFn)(void *valueAddr); | |
/** | |
* Type: CMap | |
* ---------- | |
* Defines the CMap type. The type is "incomplete", i.e. deliberately | |
* avoids stating the field names/types for the struct CMapImplementation. | |
* (That struct is completed in the implementation code not visible to | |
* clients). The incomplete type forces the client to respect the privacy | |
* of the representation. Client declare variables only of type CMap * | |
* (pointers only, never of the actual struct) and cannot never dereference | |
* a CMap * nor attempt to read/write its internal fields. A CMap | |
* is manipulated solely through the functions listed in this interface | |
* (this is analogous to how you use a FILE *). | |
*/ | |
typedef struct CMapImplementation CMap; | |
/** | |
* Function: CMapCreate | |
* Usage: CMap *mymap = CMapCreate(sizeof(int), 10, NULL); | |
* ------------------------------------------------------ | |
* Creates a new empty CMap and returns a pointer to it. The pointer | |
* points to storage allocated in the heap. When done with this CMap, the | |
* client must call CMapDispose to dispose of it. If allocation fails, an | |
* assert is raised. | |
* | |
* The valueSize parameter specifies the size, in bytes, of the | |
* values that will be stored in the CMap. For example, to store values of | |
* type double, pass sizeof(double) for the valueSize. Note that all | |
* values stored in a given CMap must be of the same type. An assert is raised | |
* if valueSize is not greater than zero. | |
* | |
* The capacityHint parameter allows the client to tune the resizing behavior for | |
* their needs. The CMap's internal storage will initially be optimized to hold | |
* this number of entries. This capacityHint is not a binding limit, just a hint. | |
* If the initial allocation is outgrown, the storage capacity will double in size. | |
* If planning to store many elements, specifying a large capacityHint will | |
* result in an appropriately large initial allocation and fewer resizing operations | |
* later. For a small map, a small capacityHint will result in several smaller | |
* allocations and potentially less waste. If capacityHint is 0, an internal | |
* default value is used. An assert is raised if capacityHint is negative. | |
* | |
* The cleanupFn is a client callback that will be called on a value being | |
* removed/replaced (via CMapRemove/CMapPut, respectively) and on every value | |
* in the CMap when it is destroyed (via CMapDispose). The client can use this | |
* function to do any deallocation/cleanup required for the value, | |
* such as freeing memory to which the value points (if the value is a | |
* pointer). The client can pass NULL for cleanupFn if values don't | |
* require any special cleanup. | |
*/ | |
CMap *CMapCreate(int valueSize, int capacityHint, CMapCleanupValueFn cleanupFn); | |
/** | |
* Function: CMapDispose | |
* Usage: CMapDispose(mymap); | |
* --------------------------- | |
* Disposes of this CMap and deallocates its memory, including the string keys that | |
* CMap copied. It does *not* automatically free memory owned by pointers | |
* embedded within the values. This would require knowledge of the structure | |
* of the values, which the CMap does not have. It *does* loop over the | |
* values calling the cleanupFn that was supplied to CMapCreate when the given | |
* CMap was created. | |
*/ | |
void CMapDispose(CMap *cm); | |
/** | |
* Function: CMapCount | |
* Usage: int count = CMapCount(mymap); | |
* ------------------------------------- | |
* Returns the number of entries currently stored in this CMap. Operates in | |
* constant-time. | |
*/ | |
int CMapCount(const CMap *cm); | |
/** | |
* Function: CMapPut | |
* Usage: CMapPut(mymap, "CS107", &val); | |
* -------------------------------------- | |
* Associates the given key with a new value in this CMap. If there is an existing | |
* value for the key, it is replaced with the new value. Before being overwritten, | |
* the cleanupFn that was supplied to CMapCreate is called on the old value. The | |
* new value is copied from the memory pointed to by valueAddr. | |
* A copy of the key string is made by the CMap to store internally. Note that keys | |
* are compared case-sensitively, e.g. "binky" is not the same key as | |
* "BinKy". The capacity is enlarged if necessary, an assert is raised | |
* on allocation failure. Operates in constant-time (amortized). | |
*/ | |
void CMapPut(CMap *cm, const char *key, const void *valueAddr); | |
/** | |
* Function: CMapGet | |
* Usage: int val = *(int *)CMapGet(mymap, "CS107"); | |
* -------------------------------------------------- | |
* Searches this CMap for an entry with the given key and if found, returns | |
* a pointer to its associated value. If key is not found, then NULL is | |
* returned as a sentinel. Note this function returns a pointer into the CMap's | |
* storage, so the pointer should be used with care. In particular, the pointer | |
* returned by CMapGet can become invalid after a call that adds or removes | |
* entries within the CMap. Note that keys are compared case-sensitively, | |
* e.g. "binky" is not the same key as "BinKy". Operates in constant-time. | |
*/ | |
void *CMapGet(const CMap *cm, const char * key); | |
/** | |
* Function: CMapRemove | |
* Usage: CMapRemove(mymap, "CS107"); | |
* ----------------------------------- | |
* Searches this CMap for an entry with the given key and if found, removes that | |
* key and its associated value. If key is not found, no changes are made. | |
* Before a value is removed, the cleanupFn that was supplied to CMapCreate will | |
* be called on the value. The CMap frees the copy of the key string it made. | |
* Note that keys are compared case-sensitively, e.g. "binky" is not the same | |
* key as "BinKy". Operates in constant-time. | |
*/ | |
void CMapRemove(CMap *cm, const char * key); | |
/** | |
* Function: CMapMap | |
* Usage: CMapMap(mymap, PrintEntry, &myData); | |
* -------------------------------------------- | |
* Iterates over all of the entries in this CMap and calls mapfn on | |
* each entry. The callback function is called with a char * key, a pointer | |
* to its associated value, and the auxData pointer. The auxData value allows | |
* the client to pass extra state information if desired. If no client data is | |
* needed, this argument can be NULL. The entries are processed in arbitrary | |
* order. Operates in linear-time. | |
*/ | |
void CMapMap(CMap *cm, CMapMapEntryFn mapfn, void *auxData); | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment