Created
          September 13, 2020 10:49 
        
      - 
      
- 
        Save wernsey/831cc801bb859bb649e194e4fb386561 to your computer and use it in GitHub Desktop. 
    String interning snippet
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <assert.h> | |
| typedef struct internTreeNode ITNode; | |
| /* ============================================================= | |
| String Interning | |
| `str_intern()` returns a reference counted copy of its parameter. It looks up | |
| the string in the collection of strings it has seen before. If it hasn't seen | |
| the string before, it creates a copy with a reference count initialised to 1 | |
| and adds it to the collection, otherwise it increments the reference count of | |
| the string in the collection and returns it. | |
| The collection the strings are kept in is a Red-Black tree. The implementation | |
| is just a straight forward adaption of the one in the [Wikipedia article][red-black]. | |
| The first parameter to `str_intern()` is a pointer to an `ITNode*` that forms | |
| the root of this tree. | |
| The reference counting works by storing an `unsigned int` in the bytes before | |
| the `char*` returned by `str_intern()`. The `char*` can therefore be used like | |
| a regular null-terminated C string. | |
| Call `str_release()` to decrement the reference count. If the reference count | |
| drops to zero, the memory is freed. | |
| [red-black]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree#Insertion | |
| ============================================================= */ | |
| #define BLACK 0 | |
| #define RED 1 | |
| struct internTreeNode { | |
| char *value; | |
| int color; | |
| struct internTreeNode *parent, *left, *right; | |
| }; | |
| static char *str_make(const char *str) { | |
| size_t len = strlen(str); | |
| unsigned int *rc = malloc((sizeof *rc) + len + 1); | |
| char *data = (char*)rc + sizeof *rc; | |
| *rc = 1; | |
| memcpy(data, str, len); | |
| data[len] = '\0'; | |
| return data; | |
| } | |
| static ITNode *make_intern_node(ITNode *parent, const char *str) { | |
| ITNode *node = malloc(sizeof *node); | |
| if(!node) | |
| return NULL; | |
| node->value = str_make(str); | |
| node->parent = parent; | |
| node->color = RED; | |
| node->left = NULL; | |
| node->right = NULL; | |
| return node; | |
| } | |
| static void str_release(char *str) { | |
| unsigned int *rc = (unsigned int *)(str - sizeof *rc); | |
| if(--(*rc) == 0) | |
| free(rc); | |
| } | |
| static char *str_retain(char *str) { | |
| unsigned int *rc = (unsigned int *)(str - sizeof *rc); | |
| (*rc)++; | |
| return str; | |
| } | |
| static ITNode *do_str_intern(ITNode *root, const char *str) { | |
| int i = strcmp(root->value, str); | |
| if(i == 0) { | |
| str_retain(root->value); | |
| return root; | |
| } else if(i > 0) { | |
| if(root->left) { | |
| return do_str_intern(root->left, str); | |
| } else { | |
| root->left = make_intern_node(root, str); | |
| return root->left; | |
| } | |
| } else { // i < 0 | |
| if(root->right) { | |
| return do_str_intern(root->right, str); | |
| } else { | |
| root->right = make_intern_node(root, str); | |
| return root->right; | |
| } | |
| } | |
| } | |
| static void repair_tree(ITNode *n); | |
| static char *str_intern(ITNode **pool, const char *str) { | |
| ITNode *n; | |
| if(!*pool) { | |
| n = make_intern_node(NULL, str); | |
| } else { | |
| n = do_str_intern(*pool, str); | |
| } | |
| repair_tree(n); | |
| ITNode *root = n; | |
| while(root->parent) | |
| root = root->parent; | |
| *pool = root; | |
| return n->value; | |
| } | |
| static void free_intern_tree(ITNode *node) { | |
| if(!node) | |
| return; | |
| free_intern_tree(node->left); | |
| free_intern_tree(node->right); | |
| // Note that node->value is not freed on purpose... | |
| free(node); | |
| } | |
| /* Red-Black Tree */ | |
| static ITNode *uncle(ITNode *n) { | |
| ITNode *p = n->parent; | |
| if(p && p->parent) | |
| return p == p->parent->left ? p->parent->right : p->parent->left; | |
| return NULL; | |
| } | |
| static void rotate_left(ITNode *n) { | |
| assert(n); | |
| ITNode *nnew = n->right; | |
| ITNode *p = n->parent; | |
| assert(nnew); | |
| n->right = nnew->left; | |
| nnew->left = n; | |
| n->parent = nnew; | |
| if(n->right) | |
| n->right->parent = n; | |
| if(p) { | |
| if(n == p->left) | |
| p->left = nnew; | |
| else | |
| p->right = nnew; | |
| } | |
| nnew->parent = p; | |
| } | |
| static void rotate_right(ITNode *n) { | |
| assert(n); | |
| ITNode *nnew = n->left; | |
| ITNode *p = n->parent; | |
| assert(nnew); | |
| n->left = nnew->right; | |
| nnew->right = n; | |
| n->parent = nnew; | |
| if(n->left) | |
| n->left->parent = n; | |
| if(p) { | |
| if(n == p->left) | |
| p->left = nnew; | |
| else | |
| p->right = nnew; | |
| } | |
| nnew->parent = p; | |
| } | |
| static void repair_tree(ITNode *n) { | |
| ITNode *p = n->parent; | |
| if(!p) { | |
| // case 1 | |
| n->color = BLACK; | |
| } else if(p->color == BLACK) { | |
| // case 2 | |
| return; | |
| } else { | |
| ITNode *u = uncle(n); | |
| ITNode *g = p->parent; | |
| assert(g); | |
| if(u && u->color == RED) { | |
| // case 3 | |
| p->color = BLACK; | |
| u->color = BLACK; | |
| g->color = RED; | |
| repair_tree(g); | |
| } else { | |
| // case 4 | |
| if(n == p->right && p == g->left) { | |
| rotate_left(p); | |
| n = n->left; | |
| p = n->parent; | |
| g = p->parent; | |
| } else if(n == p->left && p == g->right) { | |
| rotate_right(p); | |
| n = n->right; | |
| p = n->parent; | |
| g = p->parent; | |
| } | |
| // case 4 step 2 | |
| assert(p && g); | |
| if(n == p->left) | |
| rotate_right(g); | |
| else | |
| rotate_left(g); | |
| p->color = BLACK; | |
| g->color = RED; | |
| } | |
| } | |
| } | |
| // =========================================================== | |
| static void drawNode(ITNode *node) { | |
| if(!node) | |
| return; | |
| drawNode(node->left); | |
| if(node->color == RED) | |
| printf(" %s [color=red,fontcolor=white,style=filled];\n", node->value); | |
| else | |
| printf(" %s [color=black,fontcolor=white,style=filled];\n", node->value); | |
| drawNode(node->right); | |
| } | |
| static void drawLink(ITNode *node) { | |
| if(!node) | |
| return; | |
| if(node->left) { | |
| printf(" %s -> %s;\n", node->value, node->left->value); | |
| drawLink(node->left); | |
| } | |
| if(node->right) { | |
| printf(" %s -> %s;\n", node->value, node->right->value); | |
| drawLink(node->right); | |
| } | |
| } | |
| void drawGraph(ITNode *root) { | |
| printf("digraph G {\n"); | |
| drawNode(root); | |
| drawLink(root); | |
| printf("}\n"); | |
| } | |
| /* | |
| dot -Tjpeg -o x.jpg g.dot | |
| */ | |
| int main(int argc, char *argv[]) { | |
| ITNode *root = NULL; | |
| str_intern(&root, "e"); | |
| str_intern(&root, "l"); | |
| str_intern(&root, "f"); | |
| str_intern(&root, "w"); | |
| str_intern(&root, "z"); | |
| str_intern(&root, "m"); | |
| str_intern(&root, "o"); | |
| str_intern(&root, "c"); | |
| str_intern(&root, "b"); | |
| str_intern(&root, "j"); | |
| str_intern(&root, "h"); | |
| str_intern(&root, "s"); | |
| str_intern(&root, "x"); | |
| str_intern(&root, "n"); | |
| str_intern(&root, "u"); | |
| str_intern(&root, "v"); | |
| str_intern(&root, "g"); | |
| str_intern(&root, "d"); | |
| str_intern(&root, "y"); | |
| str_intern(&root, "i"); | |
| str_intern(&root, "p"); | |
| str_intern(&root, "a"); | |
| str_intern(&root, "t"); | |
| str_intern(&root, "q"); | |
| str_intern(&root, "r"); | |
| str_intern(&root, "k"); | |
| str_intern(&root, "ga"); | |
| str_intern(&root, "zd"); | |
| str_intern(&root, "gh"); | |
| str_intern(&root, "ge"); | |
| str_intern(&root, "gk"); | |
| str_intern(&root, "ze"); | |
| str_intern(&root, "da"); | |
| str_intern(&root, "za"); | |
| str_intern(&root, "zb"); | |
| str_intern(&root, "gz"); | |
| str_intern(&root, "zc"); | |
| str_intern(&root, "aa"); | |
| str_intern(&root, "az"); | |
| str_intern(&root, "ac"); | |
| str_intern(&root, "ab"); | |
| str_intern(&root, "de"); | |
| str_intern(&root, "dg"); | |
| str_intern(&root, "zf"); | |
| str_intern(&root, "ma"); | |
| str_intern(&root, "mb"); | |
| str_intern(&root, "ea"); | |
| str_intern(&root, "eaa"); | |
| str_intern(&root, "eab"); | |
| str_intern(&root, "eaba"); | |
| str_intern(&root, "eabb"); | |
| drawGraph(root); | |
| free_intern_tree(root); | |
| /* | |
| Note that this program will leak memory, because | |
| `str_release()` needs to be called on all the return | |
| values of `str_intern()` but I didn't bother with | |
| it here | |
| */ | |
| return 0; | |
| } | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
            
It outputs a file that can be passed to dot (part of the Graphviz package) to create an image file to visualize the Red-black tree.
Compile and run like so: