Created
June 7, 2019 22:55
-
-
Save mtholder/e7c2faece4b1db334e261384057422de to your computer and use it in GitHub Desktop.
start of generating a C-trie in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: <encoding name> -*- | |
| from __future__ import print_function | |
| from collections import defaultdict | |
| from math import log | |
| import codecs | |
| from ctypes import c_uint64 as c_trie_node_type | |
| import sys | |
| """ Pseudocode from Maly, 1976 Appendix B | |
| Maly, K. Compressed tries. Commun. ACM 19, 7, 409-415. [1] | |
| """ | |
| def init_c_trie(num_keys, num_letters): | |
| c_field_len = 1 + int(log(num_keys)/log(2)) | |
| min_trie_node_size = 2 + c_field_len + num_letters | |
| if min_trie_node_size > 64: | |
| raise RuntimeError("Sorry cannot currently handle min_trie_node_size > 64. Your set of strings needs size={}".format(min_trie_node_size)) | |
| return tuple(c_trie_node_type() for i in range(num_keys)) | |
| def _prep_input(keys): | |
| kl = list(keys) | |
| kl.sort() | |
| ls = set() | |
| for k in kl: | |
| ls.update(k) | |
| lsl = list(ls) | |
| lsl.sort() | |
| return kl, ''.join(lsl) | |
| def build_c_tree(keys): | |
| sorted_keys, all_letters = _prep_input(keys) | |
| num_letters = len(all_letters) | |
| curr_trie = init_c_trie(len(sorted_keys), num_letters) | |
| for key_index, key in enumerate(sorted_keys): | |
| pass | |
| print(curr_trie) | |
| def main(inp_file_name=None): | |
| if inp_file_name: | |
| with codecs.open(inp_file_name, 'rU', encoding='utf-8') as inp: | |
| keys = [line.strip() for line in inp if line.strip()] | |
| else: | |
| keys = ["A", "FOR", "IN", "THE", | |
| "AND", "FROM", "IS", "THIS", | |
| "ARE", "HAD", "IT", "TO", | |
| "AS", "HAVE", "NOT", "WAS", | |
| "AT", "HE", "OF", "WHICH", | |
| "BE", "HER", "ON", "WITH", | |
| "BUT", "HIS", "OR", "YOU", | |
| "BY", "I", "THAT",] | |
| build_c_tree(keys) | |
| if __name__ == '__main__': | |
| main(sys.argv[-1] if len(sys.argv) > 1 else None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment