Created
March 5, 2017 20:23
-
-
Save xfbs/b172cd13420435792912bcfaac37ba60 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "dictionary.h" | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <stdbool.h> | |
| #include <string.h> | |
| #define char_to_int(c) (c - 'A') | |
| #define ALPHA_SIZE ('Z' - 'A' + 1) | |
| #define BUFFERSIZE 256 | |
| struct Dictionary { | |
| const char *prefix; | |
| bool is_word; | |
| struct Dictionary *children[ALPHA_SIZE]; | |
| }; | |
| typedef Dictionary dict_t; | |
| enum dict_insert_cases { | |
| // there exists a prefix which is equal to | |
| // the word that can just be marked `is_word` | |
| DICT_INSERT_EXISTS, | |
| // there exists a prefix which is equal to | |
| // the word until some point, which needs to | |
| // be split | |
| DICT_INSERT_SPLIT_PREFIX, | |
| // the word needs to be inserted as a child | |
| // of an existing prefix | |
| DICT_INSERT_LEAF | |
| }; | |
| // creates a new string with the same content as the given word | |
| // and the given length | |
| char *word_copy(const char *src, int len); | |
| // insert word into the given dictionary with the | |
| // constraint that the word is equal to the prefix | |
| // of the dictionary leaf until `index`. | |
| void dict_insert(dict_t *dict, const char *word, int index); | |
| // figure out how to insert the word into the dictionary | |
| // returns a dict_insert_cases enum, and optionally a new_index | |
| // which is used for some cases | |
| int dict_insert_case(const dict_t *d, const char *w, int i, int *new_i); | |
| // marks the passed dict as a word | |
| void dict_insert_exists(dict_t *dict, const char *word); | |
| // splits the prefix of the dict into two at the | |
| // provided index. requires index < strlen(dict->prefix) | |
| // then it inserts the word as a leaf of the split | |
| // dict | |
| void dict_insert_prefix_split(dict_t *dict, const char *word, int index); | |
| // inserts the word as a child of the prefix. might | |
| // call dick_insert() recursively. returns the inserted | |
| // leaf. | |
| dict_t *dict_insert_prefix(dict_t *dict, const char *word, int index); | |
| // checks if the dict contains the given words, with | |
| // the constraint that the prefix of the root tree is | |
| // equal to the word up to `i` | |
| int dict_contains(const Dictionary*, const char *w, int i); | |
| // get the leaf node from the dictionary with the given character | |
| dict_t *dict_get_leaf(const dict_t *, const char c); | |
| void dict_set_leaf(dict_t *, const char c, const dict_t *); | |
| // creates a copy of a dictionary | |
| dict_t *dict_copy_leaf(dict_t *dict); | |
| // creates a new leaf with the given contents | |
| dict_t *dict_new_leaf(const char *prefix, bool is_word); | |
| // prints a dictionary recursively | |
| void dict_print(const dict_t *dict, int indent); | |
| // merges src into dest, copying all the words. requires | |
| // all words in src and dest to have a common prefix that | |
| // is index chars long. | |
| void dict_merge(dict_t *dest, const dict_t *src, int index); | |
| char *word_copy(const char *src, int len) { | |
| // allocate enough memory | |
| char *word = malloc(len+1); | |
| // copy word over | |
| memcpy(word, src, len); | |
| // NULL-terminate word | |
| word[len] = '\0'; | |
| return word; | |
| } | |
| dict_t *dict_get_leaf(const dict_t *dict, const char c) | |
| { | |
| return dict->children[char_to_int(c)]; | |
| } | |
| void dict_set_leaf(dict_t *dict, const char c, const dict_t *leaf) | |
| { | |
| dict->children[char_to_int(c)] = (dict_t *) leaf; | |
| } | |
| dict_t *dict_new_leaf(const char *prefix, bool is_word) { | |
| // allocate memory | |
| dict_t *dict = malloc(sizeof(Dictionary)); | |
| // initialize memory | |
| *dict = (Dictionary){ | |
| .prefix = prefix, | |
| .is_word = is_word, | |
| .children = {NULL}}; | |
| return dict; | |
| } | |
| Dictionary* | |
| Dictionary_create() | |
| { | |
| return dict_new_leaf(NULL, false); | |
| } | |
| dict_t *dict_copy_leaf(dict_t *dict) { | |
| // allocate memory | |
| dict_t *copy = malloc(sizeof(dict_t)); | |
| // copy over contents | |
| memcpy(copy, dict, sizeof(dict_t)); | |
| return copy; | |
| } | |
| void | |
| Dictionary_delete(Dictionary* dict) { | |
| // empty dict has a NULL prefix | |
| if(dict->prefix) { | |
| // have to cast the prefix to void* to | |
| // avoid a compiler warning | |
| free((void*) dict->prefix); | |
| } | |
| // delete children | |
| for(char c = 'A'; c <= 'Z'; c++){ | |
| if(dict_get_leaf(dict, c) != NULL) { | |
| Dictionary_delete(dict_get_leaf(dict, c)); | |
| } | |
| } | |
| free(dict); | |
| } | |
| void | |
| Dictionary_insert(Dictionary *dict, const char *word) { | |
| dict_insert(dict, word, 0); | |
| } | |
| int dict_insert_case( | |
| const dict_t *dict, | |
| const char *word, | |
| int index, | |
| int *new_index) | |
| { | |
| const char *prefix = dict->prefix; | |
| // compare prefix and word and find out if they are equal, | |
| // which one is shorter, or where they stop being equal | |
| while(prefix[index] && word[index] && prefix[index] == word[index]) { | |
| index++; | |
| } | |
| // set new_index to where we ended up after comparing | |
| *new_index = index; | |
| // if both prefix and word are NULL now, it means | |
| // they were equal so this leaf needs to be marked | |
| // as is_word | |
| if(!prefix[index] && !word[index]) | |
| return DICT_INSERT_EXISTS; | |
| // if the prefix is over (and the word isn't), we | |
| // need to insert a new leaf here | |
| else if(!prefix[index]) | |
| return DICT_INSERT_LEAF; | |
| // otherwise we have to split the prefix | |
| else //if(!word[index]) | |
| return DICT_INSERT_SPLIT_PREFIX; | |
| } | |
| void dict_insert(dict_t *dict, const char *word, int index) | |
| { | |
| // can't have null words in the dict | |
| if(word == NULL) | |
| return; | |
| // special case: on initialisation the root prefix is | |
| // NULL. in this case we can be lazy and just overwrite | |
| // that with the word | |
| if(dict->prefix == NULL) { | |
| dict->prefix = word; | |
| dict->is_word = true; | |
| return; | |
| } | |
| // figure out how to insert the word and call the right | |
| // method for it | |
| int new_index; | |
| switch(dict_insert_case(dict, word, index, &new_index)) { | |
| case DICT_INSERT_EXISTS: | |
| dict_insert_exists(dict, word); | |
| break; | |
| case DICT_INSERT_SPLIT_PREFIX: | |
| dict_insert_prefix_split(dict, word, new_index); | |
| dict_insert(dict, word, new_index); | |
| break; | |
| case DICT_INSERT_LEAF: | |
| dict_insert(dict_insert_prefix(dict, word, new_index), word, new_index); | |
| break; | |
| } | |
| } | |
| void dict_insert_exists(dict_t *dict, const char *word) { | |
| if(!dict->is_word) { | |
| // any prefixes which are not marked as words are guaranteed | |
| // to come from us, so we can safely free() them and overwrite | |
| // them with the pointer passed by the user. | |
| if(dict->prefix != word) { | |
| free((void*)dict->prefix); | |
| } | |
| dict->prefix = word; | |
| dict->is_word = true; | |
| } | |
| } | |
| void dict_insert_prefix_split(dict_t *dict, const char *word, int index) { | |
| // create new node | |
| dict_t *leaf = dict_copy_leaf(dict); | |
| // delete all children of node | |
| memset(dict->children, 0, sizeof(dict_t *) * ALPHA_SIZE); | |
| // connect new node to old node | |
| dict_set_leaf(dict, dict->prefix[index], leaf); | |
| // copy prefix as far as we need and set it as prefix of | |
| // the old node | |
| dict->prefix = word_copy(leaf->prefix, index); | |
| dict->is_word = false; | |
| } | |
| dict_t *dict_insert_prefix(dict_t *dict, const char *word, int index) { | |
| // two cases to consider: the leaf node could be nonexistent | |
| // in which case we have to create a new new, or it could | |
| // exist and we have to recursively call dict_insert(). | |
| if(dict_get_leaf(dict, word[index]) == NULL) { | |
| // create new leaf to hold the word | |
| dict_t *leaf = dict_new_leaf(word, false);; | |
| // link leaf node to parent node | |
| dict_set_leaf(dict, word[index], leaf); | |
| return leaf; | |
| } else { | |
| // a leaf node already exists, so we return it. | |
| return dict_get_leaf(dict, word[index]); | |
| } | |
| } | |
| int | |
| Dictionary_isIn(const Dictionary *dict, const char *word) { | |
| if(word == NULL || dict->prefix == NULL){ | |
| return false; | |
| } | |
| return dict_contains(dict, word, 0); | |
| } | |
| int | |
| dict_contains(const dict_t *dict, const char *word, int i){ | |
| // we reuse the dict_insert_case() method here because | |
| // it tells us if the word is in it or not | |
| int new_i; | |
| switch(dict_insert_case(dict, word, i, &new_i)) { | |
| case DICT_INSERT_EXISTS: | |
| // word exists are a prefix, we just need to | |
| // check if it's marked as a word. | |
| return dict->is_word; | |
| case DICT_INSERT_LEAF: | |
| // if there is a fitting prefix, check the | |
| // appropriate leaf recursively | |
| if(dict_get_leaf(dict, word[new_i]) != NULL) { | |
| return dict_contains(dict_get_leaf(dict, word[new_i]), word, new_i); | |
| } else { | |
| return false; | |
| } | |
| case DICT_INSERT_SPLIT_PREFIX: | |
| default: | |
| // if the word is part of a prefix, then | |
| // it's not in the dict. | |
| return false; | |
| } | |
| } | |
| void | |
| Dictionary_print(const Dictionary* dict) | |
| { | |
| dict_print(dict, 0); | |
| } | |
| void dict_print(const dict_t *dict, int indent) { | |
| // create indentation | |
| for(int i = 0; i < indent; i++) { | |
| printf(" "); | |
| } | |
| // signify if this is a word of a prefix | |
| if(dict->is_word) { | |
| printf("WORD("); | |
| } else { | |
| printf("PREFIX("); | |
| } | |
| // print the word/prefix or NULL | |
| if(dict->prefix) { | |
| printf("\"%s\")\n", dict->prefix); | |
| } else { | |
| printf("NULL)\n"); | |
| } | |
| // print children | |
| for(char c = 'A'; c <= 'Z'; c++) { | |
| Dictionary *d = dict_get_leaf(dict, c); | |
| if(d) { | |
| dict_print(d, indent+1); | |
| } | |
| } | |
| } | |
| void | |
| Dictionary_merge(Dictionary* dest, const Dictionary* src) | |
| { | |
| dict_merge(dest, src, 0); | |
| } | |
| void | |
| dict_merge(dict_t *dest, const dict_t *src, int index) | |
| { | |
| // if src is empty we are done here | |
| if(src->prefix == NULL) { | |
| return; | |
| } | |
| if(dest->prefix == NULL) { | |
| index = strlen(src->prefix); | |
| dest->prefix = word_copy(src->prefix, index); | |
| } | |
| dict_t *leaf; | |
| const char *word; | |
| int new_index; | |
| switch(dict_insert_case(dest, src->prefix, index, &new_index)) { | |
| case DICT_INSERT_EXISTS: | |
| // mark the prefix as word if necessary | |
| if(src->is_word) { | |
| dest->is_word = true; | |
| //dict_insert_exists(dest, ); | |
| } | |
| // add all child leafs | |
| for(char c = 'A'; c <= 'Z'; c++) { | |
| if((leaf = dict_get_leaf(src, c)) != NULL) { | |
| dict_merge(dest, leaf, new_index); | |
| } | |
| } | |
| break; | |
| case DICT_INSERT_SPLIT_PREFIX: | |
| // split prefix and continue merging | |
| dict_insert_prefix_split(dest, src->prefix, new_index); | |
| dict_merge(dest, src, new_index); | |
| break; | |
| case DICT_INSERT_LEAF: | |
| // create a copy of the word and add it as leaf | |
| if(!dict_get_leaf(dest, src->prefix[new_index])) { | |
| word = word_copy(src->prefix, new_index+strlen(src->prefix+new_index)); | |
| } else { | |
| word = src->prefix; | |
| } | |
| leaf = dict_insert_prefix(dest, word, new_index); | |
| dict_merge(leaf, src, new_index); | |
| break; | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment