Created
January 12, 2012 22:00
-
-
Save orlp/1603408 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Copyright (c) 2005-2008, Simon Howard | |
| Permission to use, copy, modify, and/or distribute this software | |
| for any purpose with or without fee is hereby granted, provided | |
| that the above copyright notice and this permission notice appear | |
| in all copies. | |
| THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL | |
| WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED | |
| WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE | |
| AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR | |
| CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
| LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, | |
| NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
| CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| */ | |
| /* Trie: fast mapping of strings to values */ | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include "trie.h" | |
| /* malloc() / free() testing */ | |
| #ifdef ALLOC_TESTING | |
| #include "alloc-testing.h" | |
| #endif | |
| typedef struct _TrieNode TrieNode; | |
| struct _TrieNode { | |
| TrieValue data; | |
| unsigned int use_count; | |
| TrieNode *next[256]; | |
| }; | |
| struct _Trie { | |
| TrieNode *root_node; | |
| TrieFreeFunc free_func; | |
| }; | |
| Trie *trie_new(void) | |
| { | |
| Trie *new_trie; | |
| new_trie = (Trie *) malloc(sizeof(Trie)); | |
| if (new_trie == NULL) { | |
| return NULL; | |
| } | |
| new_trie->root_node = NULL; | |
| new_trie->free_func = NULL; | |
| return new_trie; | |
| } | |
| static void trie_free_list_push(TrieNode **list, TrieNode *node) | |
| { | |
| node->data = *list; | |
| *list = node; | |
| } | |
| static TrieNode *trie_free_list_pop(TrieNode **list) | |
| { | |
| TrieNode *result; | |
| result = *list; | |
| *list = result->data; | |
| return result; | |
| } | |
| void trie_free(Trie *trie) | |
| { | |
| TrieNode *free_list; | |
| TrieNode *node; | |
| int i; | |
| free_list = NULL; | |
| /* Start with the root node */ | |
| if (trie->root_node != NULL) { | |
| if (trie->root_node->data != NULL) { | |
| trie->free_func(trie->root_node->data); | |
| } | |
| trie_free_list_push(&free_list, trie->root_node); | |
| } | |
| /* Go through the free list, freeing nodes. We add new nodes as | |
| * we encounter them; in this way, all the nodes are freed | |
| * non-recursively. */ | |
| while (free_list != NULL) { | |
| node = trie_free_list_pop(&free_list); | |
| /* Add all children of this node to the free list */ | |
| for (i=0; i<256; ++i) { | |
| if (node->next[i] != NULL) { | |
| if (node->next[i]->data != NULL) { | |
| trie->free_func(node->next[i]->data); | |
| } | |
| trie_free_list_push(&free_list, node->next[i]); | |
| } | |
| } | |
| /* Free the node */ | |
| free(node); | |
| } | |
| /* Free the trie */ | |
| free(trie); | |
| } | |
| void trie_register_free_function(Trie *trie, TrieFreeFunc free_func) { | |
| trie->free_func = free_func; | |
| } | |
| static TrieNode *trie_find_end(Trie *trie, char *key) | |
| { | |
| TrieNode *node; | |
| char *p; | |
| int c; | |
| /* Search down the trie until the end of string is reached */ | |
| node = trie->root_node; | |
| for (p=key; *p != '\0'; ++p) { | |
| if (node == NULL) { | |
| /* Not found in the tree. Return. */ | |
| return NULL; | |
| } | |
| /* Jump to the next node */ | |
| c = *p; | |
| node = node->next[c]; | |
| } | |
| /* This key is present if the value at the last node is not NULL */ | |
| return node; | |
| } | |
| /* Roll back an insert operation after a failed malloc() call. */ | |
| static void trie_insert_rollback(Trie *trie, char *key) | |
| { | |
| TrieNode *node; | |
| TrieNode **prev_ptr; | |
| TrieNode *next_node; | |
| TrieNode **next_prev_ptr; | |
| char *p; | |
| /* Follow the chain along. We know that we will never reach the | |
| * end of the string because trie_insert never got that far. As a | |
| * result, it is not necessary to check for the end of string | |
| * delimiter (NUL) */ | |
| node = trie->root_node; | |
| prev_ptr = &trie->root_node; | |
| p = key; | |
| while (node != NULL) { | |
| /* Find the next node now. We might free this node. */ | |
| next_prev_ptr = &node->next[(int) *p]; | |
| next_node = *next_prev_ptr; | |
| ++p; | |
| /* Decrease the use count and free the node if it | |
| * reaches zero. */ | |
| --node->use_count; | |
| if (node->use_count == 0) { | |
| free(node); | |
| if (prev_ptr != NULL) { | |
| *prev_ptr = NULL; | |
| } | |
| next_prev_ptr = NULL; | |
| } | |
| /* Update pointers */ | |
| node = next_node; | |
| prev_ptr = next_prev_ptr; | |
| } | |
| } | |
| int trie_insert(Trie *trie, char *key, TrieValue value) | |
| { | |
| TrieNode **rover; | |
| TrieNode *node; | |
| char *p; | |
| int c; | |
| /* Cannot insert NULL values */ | |
| if (value == TRIE_NULL) { | |
| return 0; | |
| } | |
| /* Search to see if this is already in the tree */ | |
| node = trie_find_end(trie, key); | |
| /* Already in the tree? If so, replace the existing value and | |
| * return success. */ | |
| if (node != NULL && node->data != TRIE_NULL) { | |
| node->data = value; | |
| return 1; | |
| } | |
| /* Search down the trie until we reach the end of string, | |
| * creating nodes as necessary */ | |
| rover = &trie->root_node; | |
| p = key; | |
| for (;;) { | |
| node = *rover; | |
| if (node == NULL) { | |
| /* Node does not exist, so create it */ | |
| node = (TrieNode *) calloc(1, sizeof(TrieNode)); | |
| if (node == NULL) { | |
| /* Allocation failed. Go back and undo | |
| * what we have done so far. */ | |
| trie_insert_rollback(trie, key); | |
| return 0; | |
| } | |
| node->data = TRIE_NULL; | |
| /* Link in to the trie */ | |
| *rover = node; | |
| } | |
| /* Increase the node use count */ | |
| ++node->use_count; | |
| /* Current character */ | |
| c = *p; | |
| /* Reached the end of string? If so, we're finished. */ | |
| if (c == '\0') { | |
| /* Set the data at the node we have reached */ | |
| node->data = value; | |
| break; | |
| } | |
| /* Advance to the next node in the chain */ | |
| rover = &node->next[c]; | |
| ++p; | |
| } | |
| return 1; | |
| } | |
| int trie_remove(Trie *trie, char *key) | |
| { | |
| TrieNode *node; | |
| TrieNode *next; | |
| TrieNode **last_next_ptr; | |
| char *p; | |
| int c; | |
| /* Find the end node and remove the value */ | |
| node = trie_find_end(trie, key); | |
| if (node != NULL && node->data != TRIE_NULL) { | |
| if (trie->free_func != NULL) { | |
| trie->free_func(node->data); | |
| } | |
| node->data = TRIE_NULL; | |
| } else { | |
| return 0; | |
| } | |
| /* Now traverse the tree again as before, decrementing the use | |
| * count of each node. Free back nodes as necessary. */ | |
| node = trie->root_node; | |
| last_next_ptr = &trie->root_node; | |
| p = key; | |
| for (;;) { | |
| /* Find the next node */ | |
| c = *p; | |
| next = node->next[c]; | |
| /* Free this node if necessary */ | |
| --node->use_count; | |
| if (node->use_count <= 0) { | |
| free(node); | |
| /* Set the "next" pointer on the previous node to NULL, | |
| * to unlink the freed node from the tree. This only | |
| * needs to be done once in a remove. After the first | |
| * unlink, all further nodes are also going to be | |
| * free'd. */ | |
| if (last_next_ptr != NULL) { | |
| *last_next_ptr = NULL; | |
| last_next_ptr = NULL; | |
| } | |
| } | |
| /* Go to the next character or finish */ | |
| if (c == '\0') { | |
| break; | |
| } else { | |
| ++p; | |
| } | |
| /* If necessary, save the location of the "next" pointer | |
| * so that it may be set to NULL on the next iteration if | |
| * the next node visited is freed. */ | |
| if (last_next_ptr != NULL) { | |
| last_next_ptr = &node->next[c]; | |
| } | |
| /* Jump to the next node */ | |
| node = next; | |
| } | |
| /* Removed successfully */ | |
| return 1; | |
| } | |
| TrieValue trie_lookup(Trie *trie, char *key) | |
| { | |
| TrieNode *node; | |
| node = trie_find_end(trie, key); | |
| if (node != NULL) { | |
| return node->data; | |
| } else { | |
| return TRIE_NULL; | |
| } | |
| } | |
| int trie_num_entries(Trie *trie) | |
| { | |
| /* To find the number of entries, simply look at the use count | |
| * of the root node. */ | |
| if (trie->root_node == NULL) { | |
| return 0; | |
| } else { | |
| return trie->root_node->use_count; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Copyright (c) 2005-2008, Simon Howard | |
| Permission to use, copy, modify, and/or distribute this software | |
| for any purpose with or without fee is hereby granted, provided | |
| that the above copyright notice and this permission notice appear | |
| in all copies. | |
| THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL | |
| WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED | |
| WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE | |
| AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR | |
| CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
| LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, | |
| NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
| CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| */ | |
| /** | |
| * @file trie.h | |
| * | |
| * @brief Fast string lookups | |
| * | |
| * A trie is a data structure which provides fast mappings from strings | |
| * to values. | |
| * | |
| * To create a new trie, use @ref trie_new. To destroy a trie, | |
| * use @ref trie_free. | |
| * | |
| * To insert a value into a trie, use @ref trie_insert. To remove a value | |
| * from a trie, use @ref trie_remove. | |
| * | |
| * To look up a value from its key, use @ref trie_lookup. | |
| * | |
| * To find the number of entries in a trie, use @ref trie_num_entries. | |
| */ | |
| #ifndef ALGORITHM_TRIE_H | |
| #define ALGORITHM_TRIE_H | |
| #ifdef __cplusplus | |
| extern "C" { | |
| #endif | |
| /** | |
| * A trie structure. | |
| */ | |
| typedef struct _Trie Trie; | |
| /** | |
| * Value stored in a @ref Trie. | |
| */ | |
| typedef void *TrieValue; | |
| /** | |
| * Function used to free values stored in a trie. See | |
| * @ref tree_register_free_function. | |
| */ | |
| typedef void (*TrieFreeFunc)(TrieValue value); | |
| /** | |
| * A null @ref TrieValue. | |
| */ | |
| #define TRIE_NULL ((void *) 0) | |
| /** | |
| * Create a new trie. | |
| * | |
| * @return Pointer to a new trie structure, or NULL if it | |
| * was not possible to allocate memory for the | |
| * new trie. | |
| */ | |
| Trie *trie_new(void); | |
| /** | |
| * Destroy a trie. | |
| * | |
| * @param trie The trie to destroy. | |
| */ | |
| void trie_free(Trie *trie); | |
| /** | |
| * Register a function to be called when values are removed from | |
| * the trie. | |
| * | |
| * @param trie The trie. | |
| * @param free_func Function to call when values are removed from the | |
| * trie. | |
| */ | |
| void trie_register_free_function(Trie *trie, TrieFreeFunc free_func); | |
| /** | |
| * Insert a new key-value pair into a trie. | |
| * | |
| * @param trie The trie. | |
| * @param key The key to access the new value. | |
| * @param value The value. | |
| * @return Non-zero if the value was inserted successfully, | |
| * or zero if it was not possible to allocate | |
| * memory for the new entry. | |
| */ | |
| int trie_insert(Trie *trie, char *key, TrieValue value); | |
| /** | |
| * Look up a value from its key in a trie. | |
| * | |
| * @param trie The trie. | |
| * @param key The key. | |
| * @return The value associated with the key, or | |
| * @ref TRIE_NULL if not found in the trie. | |
| */ | |
| TrieValue trie_lookup(Trie *trie, char *key); | |
| /** | |
| * Remove an entry from a trie. | |
| * | |
| * @param trie The trie. | |
| * @param key The key of the entry to remove. | |
| * @return Non-zero if the key was removed successfully, | |
| * or zero if it is not present in the trie. | |
| */ | |
| int trie_remove(Trie *trie, char *key); | |
| /** | |
| * Find the number of entries in a trie. | |
| * | |
| * @param trie The trie. | |
| * @return Count of the number of entries in the trie. | |
| */ | |
| int trie_num_entries(Trie *trie); | |
| #ifdef __cplusplus | |
| } | |
| #endif | |
| #endif /* #ifndef ALGORITHM_TRIE_H */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cdef extern from "trie.h": | |
| ctypedef struct Trie: | |
| pass | |
| ctypedef void* TrieValue | |
| ctypedef void (*TrieFreeFunc)(TrieValue value) | |
| Trie* trie_new() | |
| void trie_free(Trie *tree) | |
| void trie_register_free_function(Trie *trie, TrieFreeFunc free_func) | |
| int trie_insert(Trie *trie, char *key, TrieValue value) | |
| TrieValue trie_lookup(Trie *trie, char *key) | |
| int trie_remove(Trie *trie, char *key) | |
| int trie_num_entries(Trie *trie) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from cpython cimport ref | |
| cimport c.trie as ctrie | |
| cdef void free_func(ctrie.TrieValue value): | |
| ref.Py_DECREF(<object> value) | |
| cdef class Trie: | |
| cdef ctrie.Trie *_c_trie | |
| def __cinit__(self): | |
| self._c_trie = ctrie.trie_new() | |
| if self._c_trie == NULL: | |
| raise MemoryError() | |
| ctrie.trie_register_free_function(self._c_trie, free_func) | |
| def __setitem__(self, bytes key, object value): | |
| ref.Py_INCREF(value) | |
| ctrie.trie_insert(self._c_trie, key, <void*> value) | |
| def __getitem__(self, bytes key): | |
| cdef void *value = ctrie.trie_lookup(self._c_trie, key) | |
| if value == NULL: | |
| raise KeyError(key) | |
| return <object> value | |
| def __delitem__(self, bytes key): | |
| cdef int ret = ctrie.trie_remove(self._c_trie, key) | |
| if not ret: | |
| raise KeyError(key) | |
| def __len__(self): | |
| return ctrie.trie_num_entries(self._c_trie) | |
| # both del and dealloc free, but we are guarded against double free | |
| def __del__(self): | |
| self._free() | |
| def __dealloc__(self): | |
| self._free() | |
| cdef _free(self): | |
| if self._c_trie != NULL: | |
| ctrie.trie_free(self._c_trie) | |
| self._c_trie = NULL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment