Skip to content

Instantly share code, notes, and snippets.

@MylesBorins
Created May 5, 2016 20:36
Show Gist options
  • Save MylesBorins/b7c5520e3f0e9e76ced2239116d798b4 to your computer and use it in GitHub Desktop.
Save MylesBorins/b7c5520e3f0e9e76ced2239116d798b4 to your computer and use it in GitHub Desktop.
Not quite fast map
/*
* File: cmap.c
* Author: Myles Borins
* ----------------------
*
*/
#include "cmap.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <search.h>
/* Type: struct CMapImplementation
* -------------------------------
* This definition completes the CMap type that was declared in
* cmap.h.
*/
struct CMapImplementation {
int valueSize;
int count;
int bucketCount;
CMapCleanupValueFn cleanupFn;
char **buckets;
};
// Defines a default value to be used if the user does not provide a capacityHint
#define INIT_ALLOCATION_SIZE 8
/* Function: Hash
* --------------
* This function adapted from Eric Roberts' _The Art and Science of C_
* It takes a string and uses it to derive a "hash code," which
* is an integer in the range [0..NumBuckets-1]. The hash code is computed
* using a method called "linear congruence." A similar function using this
* method is described on page 144 of Kernighan and Ritchie. The choice of
* the value for the Multiplier can have a significant effort on the
* performance of the algorithm, but not on its correctness.
* This hash function is case-sensitive, "ZELENSKI" and "Zelenski" are
* not guaranteed to hash to same code.
*/
static int Hash(const char *s, int numBuckets)
{
const unsigned long MULTIPLIER = 2630849305L; // magic prime number
unsigned long hashcode = 0;
for (int i = 0; s[i] != '\0'; i++)
hashcode = hashcode * MULTIPLIER + s[i];
return hashcode % numBuckets;
}
/**
* Function:LoadFactor
* ------------------------------
* This helper function is used to calculat ethe Load Factor
* which is the ratio of currently stored items to total buckets
*/
int LoadFactor(CMap *cm)
{
return cm->count / cm->bucketCount;
}
/**
* Function: DisectBlob
* ------------------------------
* This helper function takes a bob and pointers to a key pointer and elemAddr pointer
* The function assigns the appropriate pointers using pointer arithmatic
*/
void DisectBlob(char *blob, char **key, char **elemAddr)
{
*key = blob + sizeof(char *);
*elemAddr = *key + strlen(*key) + 1;
}
/**
* Function: MakeBlob
* ------------------------------
* At the lowest level the map is a collection of blobs. This helper function is used
* to create the binary blobs used to store all the data
*/
char *MakeBlob(const char *key, const void *elemAddr)
{
char *blob = malloc(sizeof(char *) + strlen(key) + 1 + sizeof(elemAddr));
char *insertKey = blob + sizeof(char *);
char *insertElem = blob + sizeof(char *) + strlen(key) + 1;
*(char **)blob = NULL;
strcpy(insertKey, key);
memcpy(insertElem, elemAddr, sizeof(insertElem));
return blob;
}
/**
* Function: FindBlob
* ------------------------------
* Sometimes we need to find a blob within a linked list
* This function recusively goes through the linked list returning a pointer
* to a blob containing a key. If no blob is found -1 is returned
*/
char *FindBlob(char *blob, const char *key)
{
if(blob == NULL)
{
return NULL;
}
char *blobKey = blob + sizeof(char *);
char *nextBlob = *(char **)blob;
if(strcmp(blobKey, key) == 0)
{
return blob;
}
else if (nextBlob != NULL)
{
return FindBlob(nextBlob, key);
}
return NULL;
}
/**
* Function: CleanBlob
* ------------------------------
* When blobs are destroyed they need to be properly cleaned using the
* provided cleanupFn
*/
void CleanBlob(CMap *cm, char *blob)
{
char *key, *valueAddr;
DisectBlob(blob, &key, &valueAddr);
if(cm->cleanupFn != NULL)
cm->cleanupFn(valueAddr);
}
/**
* Function: DeleteBlob
* ------------------------------
* Want to get rid of a blob? This function takes car of all the nasty pointer logic
* of removing a blob, cleaning the memory, and properly setting the pointers in the linked
* list. The one bit of crafty logic here is returning the returing of a pointer. This logic
* tkes care of resetting all the pointers in the link list.
*
* Some inspiration taken from http://www.cs.bu.edu/teaching/c/linked-list/delete/
*/
char *DeleteBlob(CMap *cm, char *blob, const char *key)
{
char *keyAddr, *valueAddr;
DisectBlob(blob, &keyAddr, &valueAddr);
char *nextBlob = *(char **)blob;
if(strcmp(key, keyAddr) == 0)
{
if(cm->cleanupFn != NULL)
cm->cleanupFn(valueAddr);
free(blob);
cm->count--;
return nextBlob;
}
else if (nextBlob == NULL)
{
return blob;
}
*(char **)blob = DeleteBlob(cm, nextBlob, key);
return blob;
}
/**
* Function: CMapCreate
* Usage: CMap *mymap = CMapCreate(sizeof(int), 10, NULL);
* ------------------------------------------------------
* Creates a new empty CMap and returns a pointer to it. The pointer
* points to storage allocated in the heap. When done with this CMap, the
* client must call CMapDispose to dispose of it. If allocation fails, an
* assert is raised.
*
* The valueSize parameter specifies the size, in bytes, of the
* values that will be stored in the CMap. For example, to store values of
* type double, pass sizeof(double) for the valueSize. Note that all
* values stored in a given CMap must be of the same type. An assert is raised
* if valueSize is not greater than zero.
*
* The capacityHint parameter allows the client to tune the resizing behavior for
* their needs. The CMap's internal storage will initially be optimized to hold
* this number of entries. This capacityHint is not a binding limit, just a hint.
* If the initial allocation is outgrown, the storage capacity will double in size.
* If planning to store many elements, specifying a large capacityHint will
* result in an appropriately large initial allocation and fewer resizing operations
* later. For a small map, a small capacityHint will result in several smaller
* allocations and potentially less waste. If capacityHint is 0, an internal
* default value is used. An assert is raised if capacityHint is negative.
*
* The cleanupFn is a client callback that will be called on a value being
* removed/replaced (via CMapRemove/CMapPut, respectively) and on every value
* in the CMap when it is destroyed (via CMapDispose). The client can use this
* function to do any deallocation/cleanup required for the value,
* such as freeing memory to which the value points (if the value is a
* pointer). The client can pass NULL for cleanupFn if values don't
* require any special cleanup.
*/
CMap *CMapCreate(int valueSize, int capacityHint, CMapCleanupValueFn fn)
{
assert(valueSize > 0);
CMap *map = malloc(sizeof(struct CMapImplementation));
assert(map != NULL);
assert(capacityHint >= 0);
if(!capacityHint)
capacityHint = INIT_ALLOCATION_SIZE;
map->bucketCount = capacityHint;
map->count = 0;
map->cleanupFn = fn;
map->valueSize = valueSize;
map->buckets = calloc(map->bucketCount, sizeof(char *));
assert(map->buckets != NULL);
return map;
}
/**
* Function: CleanBucket
* ---------------------------
* Helper function fo CMapDispose. When you want to clean up a bucket it is
* way simpler to do so recursively. Dispose calls this function to take care
* of the elements in the buckets
*/
void CleanBucket(CMap *cm, char *bucket)
{
if(bucket == NULL)
return;
char *keyAddr, *valueAddr;
DisectBlob(bucket, &keyAddr, &valueAddr);
char *nextBlob = *(char **)bucket;
if(nextBlob != NULL)
{
CleanBucket(cm, nextBlob);
}
if(cm->cleanupFn)
{
cm->cleanupFn(valueAddr);
}
free(bucket);
}
/**
* Function: CMapDispose
* Usage: CMapDispose(mymap);
* ---------------------------
* Disposes of this CMap and deallocates its memory, including the string keys that
* CMap copied. It does *not* automatically free memory owned by pointers
* embedded within the values. This would require knowledge of the structure
* of the values, which the CMap does not have. It *does* loop over the
* values calling the cleanupFn that was supplied to CMapCreate when the given
* CMap was created.
*/
void CMapDispose(CMap *cm)
{
int bucketCount = cm->bucketCount;
for (int i = 0; i < bucketCount; i++)
{
char *list = cm->buckets[i];
if(list != NULL);
CleanBucket(cm, list);
}
free(cm->buckets);
free(cm);
}
/**
* Function: CMapCount
* Usage: int count = CMapCount(mymap);
* -------------------------------------
* Returns the number of entries currently stored in this CMap. Operates in
* constant-time.
*/
int CMapCount(const CMap *cm)
{
return cm->count;
}
/**
* Function: CMapGet
* Usage: int val = *(int *)CMapGet(mymap, "CS107");
* --------------------------------------------------
* Searches this CMap for an entry with the given key and if found, returns
* a pointer to its associated value. If key is not found, then NULL is
* returned as a sentinel. Note this function returns a pointer into the CMap's
* storage, so the pointer should be used with care. In particular, the pointer
* returned by CMapGet can become invalid after a call that adds or removes
* entries within the CMap. Note that keys are compared case-sensitively,
* e.g. "binky" is not the same key as "BinKy". Operates in constant-time.
*/
void *CMapGet(const CMap *cm, const char *key)
{
int hash = Hash(key, cm->bucketCount);
char *bucket = cm->buckets[hash];
char *blob = FindBlob(bucket, key);
if (blob == NULL)
return blob;
char *keyPtr, *elemAddrPtr;
DisectBlob(blob, &keyPtr, &elemAddrPtr);
return elemAddrPtr;
}
/**
* Function: InsertBlob
* ------------------------------
* Sometimes you want to put a blob in a bucket and not really think about it
* this function lets you you specify a bucket / blob and then inserts the blob
* while maintaining the order of the linked list.
*/
void InsertBlob(char **bucket, char *blob)
{
if (*bucket == NULL)
{
*bucket = blob;
}
else
{
char **nextLink = (char **)*bucket;
InsertBlob(nextLink, blob);
}
}
/**
* Function: ReHash
* ------------------------------
* I can has buckets?
* If you LoadFactor is >= 2 then you need to double your buckets
* This Rehash function helps move everything around
*/
void Rehash(CMap *cm) {
int newCapacity = cm->bucketCount * 2;
char **newBuckets = calloc(newCapacity, sizeof(cm->valueSize));
for (int i = 0; i < cm->bucketCount; i++)
{
char **bucket = &(cm->buckets[i]);
char *blob = *bucket;
if (blob != NULL)
{
while(blob)
{
char *keyAddr, *valueAddr;
DisectBlob(blob, &keyAddr, &valueAddr);
char *nextBlob = *(char **)blob;
*(char **)blob = NULL;
int hash = Hash(keyAddr, newCapacity);
InsertBlob(&(newBuckets[hash]), blob);
blob = nextBlob;
}
}
}
free(cm->buckets);
cm->buckets = newBuckets;
cm->bucketCount = newCapacity;
}
/**
* Function: CMapPut
* Usage: CMapPut(mymap, "CS107", &val);
* --------------------------------------
* Associates the given key with a new value in this CMap. If there is an existing
* value for the key, it is replaced with the new value. Before being overwritten,
* the cleanupFn that was supplied to CMapCreate is called on the old value. The
* new value is copied from the memory pointed to by valueAddr.
* A copy of the key string is made by the CMap to store internally. Note that keys
* are compared case-sensitively, e.g. "binky" is not the same key as
* "BinKy". The capacity is enlarged if necessary, an assert is raised
* on allocation failure. Operates in constant-time (amortized).
*/
void CMapPut(CMap *cm, const char *key, const void *elemAddr)
{
if(LoadFactor(cm) >= 2)
Rehash(cm);
int index = Hash(key, cm->bucketCount);
if(cm->buckets[index] == NULL)
{
cm->buckets[index] = MakeBlob(key, elemAddr);
cm->count++;
}
else
{
char *blob = FindBlob(cm->buckets[index], key);
char *newBlob = MakeBlob(key, elemAddr);
assert(newBlob != NULL);
if (blob == NULL)
{
InsertBlob(&(cm->buckets[index]), newBlob);
cm->count++;
}
else
{
CMapRemove(cm, key);
InsertBlob(&(cm->buckets[index]), newBlob);
cm->count++;
}
}
}
/**
* Function: CMapRemove
* Usage: CMapRemove(mymap, "CS107");
* -----------------------------------
* Searches this CMap for an entry with the given key and if found, removes that
* key and its associated value. If key is not found, no changes are made.
* Before a value is removed, the cleanupFn that was supplied to CMapCreate will
* be called on the value. The CMap frees the copy of the key string it made.
* Note that keys are compared case-sensitively, e.g. "binky" is not the same
* key as "BinKy". Operates in constant-time.
*/
void CMapRemove(CMap *cm, const char *key)
{
int hash = Hash(key, cm->bucketCount);
if(cm->buckets[hash])
{
cm->buckets[hash] = DeleteBlob(cm, cm->buckets[hash], key);
}
}
/**
* Function: BlobMap
* --------------------------------------------
* A helper function for CMapMap to recusively go through linked list.
*/
void BlobMap(char *bucket, CMapMapEntryFn mapfn, void *auxData)
{
char *nextBucket = *(char **)bucket;
if(nextBucket)
{
BlobMap(nextBucket, mapfn, auxData);
}
char *key = bucket + sizeof(char *);
void *valueAddr = bucket + sizeof(char *) + strlen(key) + 1;
mapfn(key, valueAddr, auxData);
}
/**
* Function: CMapMap
* Usage: CMapMap(mymap, PrintEntry, &myData);
* --------------------------------------------
* Iterates over all of the entries in this CMap and calls mapfn on
* each entry. The callback function is called with a char * key, a pointer
* to its associated value, and the auxData pointer. The auxData value allows
* the client to pass extra state information if desired. If no client data is
* needed, this argument can be NULL. The entries are processed in arbitrary
* order. Operates in linear-time.
*/
void CMapMap(CMap *cm, CMapMapEntryFn mapfn, void *auxData)
{
for (int i = 0; i < cm->bucketCount; i++)
{
if(cm->buckets[i] != NULL)
BlobMap(cm->buckets[i], mapfn, auxData);
}
}
/**
* File: cmap.h
* ------------
* Defines the interface for the CMap type.
*
* The CMap manages a collection of key-value pairs or "entries". The keys
* are always of type char *, but the values can be of any desired type. The
* main operations are to associate a value with a key and to retrieve
* the value associated with a key. In order to work for all types of values,
* all CMap values must be passed and returned via (void *) pointers.
* Given its extensive use of untyped pointers, the CMap is a bit tricky
* to use correctly as a client. Be diligent!
*
* CS107 jzelenski
*/
#ifndef _cmap_h
#define _cmap_h
/**
* Type: CMapMapEntryFn
* --------------------
* CMapMapEntryFn is the typename for a pointer to a client-supplied mapping
* function. Such function pointers are passed to CMapMap to apply to each
* (key, value) pair in the CMap. The mapping function takes a char* key,
* a void* pointer to its associated value, and another void* for
* extra data passed by the client.
*/
typedef void (*CMapMapEntryFn)(const char *key, void *valueAddr, void *auxData);
/**
* Type: CMapCleanupValueFn
* ------------------------
* CMapCleanupValueFn is the typename for a pointer to a client-supplied cleanup
* function. Such function pointers are passed to CMapCreate to apply
* to a value when it is removed from the CMap. The cleanup function takes one
* void* pointer that points to the value about to be removed.
*/
typedef void (*CMapCleanupValueFn)(void *valueAddr);
/**
* Type: CMap
* ----------
* Defines the CMap type. The type is "incomplete", i.e. deliberately
* avoids stating the field names/types for the struct CMapImplementation.
* (That struct is completed in the implementation code not visible to
* clients). The incomplete type forces the client to respect the privacy
* of the representation. Client declare variables only of type CMap *
* (pointers only, never of the actual struct) and cannot never dereference
* a CMap * nor attempt to read/write its internal fields. A CMap
* is manipulated solely through the functions listed in this interface
* (this is analogous to how you use a FILE *).
*/
typedef struct CMapImplementation CMap;
/**
* Function: CMapCreate
* Usage: CMap *mymap = CMapCreate(sizeof(int), 10, NULL);
* ------------------------------------------------------
* Creates a new empty CMap and returns a pointer to it. The pointer
* points to storage allocated in the heap. When done with this CMap, the
* client must call CMapDispose to dispose of it. If allocation fails, an
* assert is raised.
*
* The valueSize parameter specifies the size, in bytes, of the
* values that will be stored in the CMap. For example, to store values of
* type double, pass sizeof(double) for the valueSize. Note that all
* values stored in a given CMap must be of the same type. An assert is raised
* if valueSize is not greater than zero.
*
* The capacityHint parameter allows the client to tune the resizing behavior for
* their needs. The CMap's internal storage will initially be optimized to hold
* this number of entries. This capacityHint is not a binding limit, just a hint.
* If the initial allocation is outgrown, the storage capacity will double in size.
* If planning to store many elements, specifying a large capacityHint will
* result in an appropriately large initial allocation and fewer resizing operations
* later. For a small map, a small capacityHint will result in several smaller
* allocations and potentially less waste. If capacityHint is 0, an internal
* default value is used. An assert is raised if capacityHint is negative.
*
* The cleanupFn is a client callback that will be called on a value being
* removed/replaced (via CMapRemove/CMapPut, respectively) and on every value
* in the CMap when it is destroyed (via CMapDispose). The client can use this
* function to do any deallocation/cleanup required for the value,
* such as freeing memory to which the value points (if the value is a
* pointer). The client can pass NULL for cleanupFn if values don't
* require any special cleanup.
*/
CMap *CMapCreate(int valueSize, int capacityHint, CMapCleanupValueFn cleanupFn);
/**
* Function: CMapDispose
* Usage: CMapDispose(mymap);
* ---------------------------
* Disposes of this CMap and deallocates its memory, including the string keys that
* CMap copied. It does *not* automatically free memory owned by pointers
* embedded within the values. This would require knowledge of the structure
* of the values, which the CMap does not have. It *does* loop over the
* values calling the cleanupFn that was supplied to CMapCreate when the given
* CMap was created.
*/
void CMapDispose(CMap *cm);
/**
* Function: CMapCount
* Usage: int count = CMapCount(mymap);
* -------------------------------------
* Returns the number of entries currently stored in this CMap. Operates in
* constant-time.
*/
int CMapCount(const CMap *cm);
/**
* Function: CMapPut
* Usage: CMapPut(mymap, "CS107", &val);
* --------------------------------------
* Associates the given key with a new value in this CMap. If there is an existing
* value for the key, it is replaced with the new value. Before being overwritten,
* the cleanupFn that was supplied to CMapCreate is called on the old value. The
* new value is copied from the memory pointed to by valueAddr.
* A copy of the key string is made by the CMap to store internally. Note that keys
* are compared case-sensitively, e.g. "binky" is not the same key as
* "BinKy". The capacity is enlarged if necessary, an assert is raised
* on allocation failure. Operates in constant-time (amortized).
*/
void CMapPut(CMap *cm, const char *key, const void *valueAddr);
/**
* Function: CMapGet
* Usage: int val = *(int *)CMapGet(mymap, "CS107");
* --------------------------------------------------
* Searches this CMap for an entry with the given key and if found, returns
* a pointer to its associated value. If key is not found, then NULL is
* returned as a sentinel. Note this function returns a pointer into the CMap's
* storage, so the pointer should be used with care. In particular, the pointer
* returned by CMapGet can become invalid after a call that adds or removes
* entries within the CMap. Note that keys are compared case-sensitively,
* e.g. "binky" is not the same key as "BinKy". Operates in constant-time.
*/
void *CMapGet(const CMap *cm, const char * key);
/**
* Function: CMapRemove
* Usage: CMapRemove(mymap, "CS107");
* -----------------------------------
* Searches this CMap for an entry with the given key and if found, removes that
* key and its associated value. If key is not found, no changes are made.
* Before a value is removed, the cleanupFn that was supplied to CMapCreate will
* be called on the value. The CMap frees the copy of the key string it made.
* Note that keys are compared case-sensitively, e.g. "binky" is not the same
* key as "BinKy". Operates in constant-time.
*/
void CMapRemove(CMap *cm, const char * key);
/**
* Function: CMapMap
* Usage: CMapMap(mymap, PrintEntry, &myData);
* --------------------------------------------
* Iterates over all of the entries in this CMap and calls mapfn on
* each entry. The callback function is called with a char * key, a pointer
* to its associated value, and the auxData pointer. The auxData value allows
* the client to pass extra state information if desired. If no client data is
* needed, this argument can be NULL. The entries are processed in arbitrary
* order. Operates in linear-time.
*/
void CMapMap(CMap *cm, CMapMapEntryFn mapfn, void *auxData);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment