Skip to content

Instantly share code, notes, and snippets.

@mtholder
Created June 7, 2019 22:55
Show Gist options
  • Select an option

  • Save mtholder/e7c2faece4b1db334e261384057422de to your computer and use it in GitHub Desktop.

Select an option

Save mtholder/e7c2faece4b1db334e261384057422de to your computer and use it in GitHub Desktop.
start of generating a C-trie in python
#!/usr/bin/env python3
# -*- coding: <encoding name> -*-
from __future__ import print_function
from collections import defaultdict
from math import log
import codecs
from ctypes import c_uint64 as c_trie_node_type
import sys
""" Pseudocode from Maly, 1976 Appendix B
Maly, K. Compressed tries. Commun. ACM 19, 7, 409-415. [1]
"""
def init_c_trie(num_keys, num_letters):
c_field_len = 1 + int(log(num_keys)/log(2))
min_trie_node_size = 2 + c_field_len + num_letters
if min_trie_node_size > 64:
raise RuntimeError("Sorry cannot currently handle min_trie_node_size > 64. Your set of strings needs size={}".format(min_trie_node_size))
return tuple(c_trie_node_type() for i in range(num_keys))
def _prep_input(keys):
kl = list(keys)
kl.sort()
ls = set()
for k in kl:
ls.update(k)
lsl = list(ls)
lsl.sort()
return kl, ''.join(lsl)
def build_c_tree(keys):
sorted_keys, all_letters = _prep_input(keys)
num_letters = len(all_letters)
curr_trie = init_c_trie(len(sorted_keys), num_letters)
for key_index, key in enumerate(sorted_keys):
pass
print(curr_trie)
def main(inp_file_name=None):
if inp_file_name:
with codecs.open(inp_file_name, 'rU', encoding='utf-8') as inp:
keys = [line.strip() for line in inp if line.strip()]
else:
keys = ["A", "FOR", "IN", "THE",
"AND", "FROM", "IS", "THIS",
"ARE", "HAD", "IT", "TO",
"AS", "HAVE", "NOT", "WAS",
"AT", "HE", "OF", "WHICH",
"BE", "HER", "ON", "WITH",
"BUT", "HIS", "OR", "YOU",
"BY", "I", "THAT",]
build_c_tree(keys)
if __name__ == '__main__':
main(sys.argv[-1] if len(sys.argv) > 1 else None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment