Skip to content

Instantly share code, notes, and snippets.

@xfbs
Created March 5, 2017 20:23
Show Gist options
  • Select an option

  • Save xfbs/b172cd13420435792912bcfaac37ba60 to your computer and use it in GitHub Desktop.

Select an option

Save xfbs/b172cd13420435792912bcfaac37ba60 to your computer and use it in GitHub Desktop.
#include "dictionary.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#define char_to_int(c) (c - 'A')
#define ALPHA_SIZE ('Z' - 'A' + 1)
#define BUFFERSIZE 256
struct Dictionary {
const char *prefix;
bool is_word;
struct Dictionary *children[ALPHA_SIZE];
};
typedef Dictionary dict_t;
enum dict_insert_cases {
// there exists a prefix which is equal to
// the word that can just be marked `is_word`
DICT_INSERT_EXISTS,
// there exists a prefix which is equal to
// the word until some point, which needs to
// be split
DICT_INSERT_SPLIT_PREFIX,
// the word needs to be inserted as a child
// of an existing prefix
DICT_INSERT_LEAF
};
// creates a new string with the same content as the given word
// and the given length
char *word_copy(const char *src, int len);
// insert word into the given dictionary with the
// constraint that the word is equal to the prefix
// of the dictionary leaf until `index`.
void dict_insert(dict_t *dict, const char *word, int index);
// figure out how to insert the word into the dictionary
// returns a dict_insert_cases enum, and optionally a new_index
// which is used for some cases
int dict_insert_case(const dict_t *d, const char *w, int i, int *new_i);
// marks the passed dict as a word
void dict_insert_exists(dict_t *dict, const char *word);
// splits the prefix of the dict into two at the
// provided index. requires index < strlen(dict->prefix)
// then it inserts the word as a leaf of the split
// dict
void dict_insert_prefix_split(dict_t *dict, const char *word, int index);
// inserts the word as a child of the prefix. might
// call dick_insert() recursively. returns the inserted
// leaf.
dict_t *dict_insert_prefix(dict_t *dict, const char *word, int index);
// checks if the dict contains the given words, with
// the constraint that the prefix of the root tree is
// equal to the word up to `i`
int dict_contains(const Dictionary*, const char *w, int i);
// get the leaf node from the dictionary with the given character
dict_t *dict_get_leaf(const dict_t *, const char c);
void dict_set_leaf(dict_t *, const char c, const dict_t *);
// creates a copy of a dictionary
dict_t *dict_copy_leaf(dict_t *dict);
// creates a new leaf with the given contents
dict_t *dict_new_leaf(const char *prefix, bool is_word);
// prints a dictionary recursively
void dict_print(const dict_t *dict, int indent);
// merges src into dest, copying all the words. requires
// all words in src and dest to have a common prefix that
// is index chars long.
void dict_merge(dict_t *dest, const dict_t *src, int index);
char *word_copy(const char *src, int len) {
// allocate enough memory
char *word = malloc(len+1);
// copy word over
memcpy(word, src, len);
// NULL-terminate word
word[len] = '\0';
return word;
}
dict_t *dict_get_leaf(const dict_t *dict, const char c)
{
return dict->children[char_to_int(c)];
}
void dict_set_leaf(dict_t *dict, const char c, const dict_t *leaf)
{
dict->children[char_to_int(c)] = (dict_t *) leaf;
}
dict_t *dict_new_leaf(const char *prefix, bool is_word) {
// allocate memory
dict_t *dict = malloc(sizeof(Dictionary));
// initialize memory
*dict = (Dictionary){
.prefix = prefix,
.is_word = is_word,
.children = {NULL}};
return dict;
}
Dictionary*
Dictionary_create()
{
return dict_new_leaf(NULL, false);
}
dict_t *dict_copy_leaf(dict_t *dict) {
// allocate memory
dict_t *copy = malloc(sizeof(dict_t));
// copy over contents
memcpy(copy, dict, sizeof(dict_t));
return copy;
}
void
Dictionary_delete(Dictionary* dict) {
// empty dict has a NULL prefix
if(dict->prefix) {
// have to cast the prefix to void* to
// avoid a compiler warning
free((void*) dict->prefix);
}
// delete children
for(char c = 'A'; c <= 'Z'; c++){
if(dict_get_leaf(dict, c) != NULL) {
Dictionary_delete(dict_get_leaf(dict, c));
}
}
free(dict);
}
void
Dictionary_insert(Dictionary *dict, const char *word) {
dict_insert(dict, word, 0);
}
int dict_insert_case(
const dict_t *dict,
const char *word,
int index,
int *new_index)
{
const char *prefix = dict->prefix;
// compare prefix and word and find out if they are equal,
// which one is shorter, or where they stop being equal
while(prefix[index] && word[index] && prefix[index] == word[index]) {
index++;
}
// set new_index to where we ended up after comparing
*new_index = index;
// if both prefix and word are NULL now, it means
// they were equal so this leaf needs to be marked
// as is_word
if(!prefix[index] && !word[index])
return DICT_INSERT_EXISTS;
// if the prefix is over (and the word isn't), we
// need to insert a new leaf here
else if(!prefix[index])
return DICT_INSERT_LEAF;
// otherwise we have to split the prefix
else //if(!word[index])
return DICT_INSERT_SPLIT_PREFIX;
}
void dict_insert(dict_t *dict, const char *word, int index)
{
// can't have null words in the dict
if(word == NULL)
return;
// special case: on initialisation the root prefix is
// NULL. in this case we can be lazy and just overwrite
// that with the word
if(dict->prefix == NULL) {
dict->prefix = word;
dict->is_word = true;
return;
}
// figure out how to insert the word and call the right
// method for it
int new_index;
switch(dict_insert_case(dict, word, index, &new_index)) {
case DICT_INSERT_EXISTS:
dict_insert_exists(dict, word);
break;
case DICT_INSERT_SPLIT_PREFIX:
dict_insert_prefix_split(dict, word, new_index);
dict_insert(dict, word, new_index);
break;
case DICT_INSERT_LEAF:
dict_insert(dict_insert_prefix(dict, word, new_index), word, new_index);
break;
}
}
void dict_insert_exists(dict_t *dict, const char *word) {
if(!dict->is_word) {
// any prefixes which are not marked as words are guaranteed
// to come from us, so we can safely free() them and overwrite
// them with the pointer passed by the user.
if(dict->prefix != word) {
free((void*)dict->prefix);
}
dict->prefix = word;
dict->is_word = true;
}
}
void dict_insert_prefix_split(dict_t *dict, const char *word, int index) {
// create new node
dict_t *leaf = dict_copy_leaf(dict);
// delete all children of node
memset(dict->children, 0, sizeof(dict_t *) * ALPHA_SIZE);
// connect new node to old node
dict_set_leaf(dict, dict->prefix[index], leaf);
// copy prefix as far as we need and set it as prefix of
// the old node
dict->prefix = word_copy(leaf->prefix, index);
dict->is_word = false;
}
dict_t *dict_insert_prefix(dict_t *dict, const char *word, int index) {
// two cases to consider: the leaf node could be nonexistent
// in which case we have to create a new new, or it could
// exist and we have to recursively call dict_insert().
if(dict_get_leaf(dict, word[index]) == NULL) {
// create new leaf to hold the word
dict_t *leaf = dict_new_leaf(word, false);;
// link leaf node to parent node
dict_set_leaf(dict, word[index], leaf);
return leaf;
} else {
// a leaf node already exists, so we return it.
return dict_get_leaf(dict, word[index]);
}
}
int
Dictionary_isIn(const Dictionary *dict, const char *word) {
if(word == NULL || dict->prefix == NULL){
return false;
}
return dict_contains(dict, word, 0);
}
int
dict_contains(const dict_t *dict, const char *word, int i){
// we reuse the dict_insert_case() method here because
// it tells us if the word is in it or not
int new_i;
switch(dict_insert_case(dict, word, i, &new_i)) {
case DICT_INSERT_EXISTS:
// word exists are a prefix, we just need to
// check if it's marked as a word.
return dict->is_word;
case DICT_INSERT_LEAF:
// if there is a fitting prefix, check the
// appropriate leaf recursively
if(dict_get_leaf(dict, word[new_i]) != NULL) {
return dict_contains(dict_get_leaf(dict, word[new_i]), word, new_i);
} else {
return false;
}
case DICT_INSERT_SPLIT_PREFIX:
default:
// if the word is part of a prefix, then
// it's not in the dict.
return false;
}
}
void
Dictionary_print(const Dictionary* dict)
{
dict_print(dict, 0);
}
void dict_print(const dict_t *dict, int indent) {
// create indentation
for(int i = 0; i < indent; i++) {
printf(" ");
}
// signify if this is a word of a prefix
if(dict->is_word) {
printf("WORD(");
} else {
printf("PREFIX(");
}
// print the word/prefix or NULL
if(dict->prefix) {
printf("\"%s\")\n", dict->prefix);
} else {
printf("NULL)\n");
}
// print children
for(char c = 'A'; c <= 'Z'; c++) {
Dictionary *d = dict_get_leaf(dict, c);
if(d) {
dict_print(d, indent+1);
}
}
}
void
Dictionary_merge(Dictionary* dest, const Dictionary* src)
{
dict_merge(dest, src, 0);
}
void
dict_merge(dict_t *dest, const dict_t *src, int index)
{
// if src is empty we are done here
if(src->prefix == NULL) {
return;
}
if(dest->prefix == NULL) {
index = strlen(src->prefix);
dest->prefix = word_copy(src->prefix, index);
}
dict_t *leaf;
const char *word;
int new_index;
switch(dict_insert_case(dest, src->prefix, index, &new_index)) {
case DICT_INSERT_EXISTS:
// mark the prefix as word if necessary
if(src->is_word) {
dest->is_word = true;
//dict_insert_exists(dest, );
}
// add all child leafs
for(char c = 'A'; c <= 'Z'; c++) {
if((leaf = dict_get_leaf(src, c)) != NULL) {
dict_merge(dest, leaf, new_index);
}
}
break;
case DICT_INSERT_SPLIT_PREFIX:
// split prefix and continue merging
dict_insert_prefix_split(dest, src->prefix, new_index);
dict_merge(dest, src, new_index);
break;
case DICT_INSERT_LEAF:
// create a copy of the word and add it as leaf
if(!dict_get_leaf(dest, src->prefix[new_index])) {
word = word_copy(src->prefix, new_index+strlen(src->prefix+new_index));
} else {
word = src->prefix;
}
leaf = dict_insert_prefix(dest, word, new_index);
dict_merge(leaf, src, new_index);
break;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment