Created
August 19, 2023 19:32
-
-
Save quasar098/5a4c2f88d4d4665d38e21bf3b08f9bde to your computer and use it in GitHub Desktop.
huffman coding for n-ary tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Union | |
ALPHABET = "0123" # base 4 example | |
BASE = len(ALPHABET) | |
class TextNode: | |
def __init__(self, char: str, val: str): | |
self.children = None | |
self.value = val | |
self.char = char | |
def __repr__(self): | |
return f"<TextNode({self.char}, {self.value})>" | |
def cleanse_placeholders(self): | |
return | |
def get_tree(self): | |
nl = "\n" | |
nnl = "\\n" | |
return f'"{self.char if self.char != nl else nnl}"' | |
class Node: | |
def __init__(self, children: list): | |
self.children: list[Union[Node, TextNode]] = children | |
@property | |
def value(self): | |
return sum([child.value for child in self.children]) | |
def cleanse_placeholders(self): | |
for node in [child for child in self.children]: | |
if isinstance(node, TextNode) and node.char is None: | |
self.children.remove(node) | |
continue | |
node.cleanse_placeholders() | |
def __repr__(self): | |
return f"<Node({self.children}, {self.value})>" | |
def get_tree(self, indent=0, indent_lines=None): | |
def get_indent(amount: int): | |
indent_total = "" | |
for i in range(amount): | |
if i in indent_lines: | |
indent_total += "│" | |
else: | |
indent_total += " " | |
return indent_total | |
if indent_lines is None: | |
indent_lines = [] | |
total = f"root──" if indent == 0 else "" | |
for index, node in enumerate(self.children): | |
if not isinstance(node, TextNode): | |
connection = "└" | |
if len(self.children)-1 != index: | |
connection = "├" | |
if indent not in indent_lines: | |
indent_lines.append(indent) | |
else: | |
indent_lines = [_ for _ in indent_lines if _ != indent] | |
indent_text = get_indent(indent) | |
if indent == 0: | |
connection = "┬" + connection[1:] | |
indent += 6 | |
indent_lines = [6] | |
total += f"{indent_text}{connection}──{ALPHABET[index]}\n" + node.get_tree(indent+3, indent_lines) | |
else: | |
connection = "└" | |
if len(self.children)-1 != index: | |
connection = "├" | |
indent_text = get_indent(indent) | |
if indent == 0: | |
connection = "┬" + connection[1:] | |
indent += 6 | |
indent_lines = [6] | |
total += f"{indent_text}{connection}──{ALPHABET[index]} ({node.get_tree()})\n" | |
return total | |
def main(): | |
text = """etiam tempor orci eu lobortis elementum nibh tellus molestie nunc non blandit massa enim nec dui nunc mattis enim | |
ut tellus elementum sagittis vitae et leo duis ut diam quam nulla porttitor massa id neque aliquam vestibulum morbi blandit | |
cursus risus at ultrices mi tempus imperdiet nulla malesuada pellentesque elit eget gravida cum sociis natoque penatibus et | |
magnis dis parturient montes nascetur ridiculus mus mauris vitae ultricies leo integer malesuada nunc vel risus commodo viverra | |
maecenas accumsan lacus vel facilisis volutpat est velit egestas dui id ornare arcu odio ut sem nulla pharetra diam sit amet nisl | |
suscipit adipiscing bibendum est ultricies integer quis auctor elit sed vulputate mi sit amet mauris commodo quis imperdiet massa | |
tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin libero nunc consequat interdum varius sit amet mattis vulputate | |
enim nulla aliquet porttitor lacus luctus accumsan tortor posuere ac ut consequat semper viverra nam libero justo laoreet sit amet cursus | |
sit amet dictum sit amet justo donec enim diam vulputate ut pharetra sit amet aliquam id diam maecenas ultricies mi eget mauris pharetra | |
et ultrices neque ornare aenean euismod elementum nisi quis eleifend quam adipiscing vitae proin sagittis nisl rhoncus mattis | |
rhoncus urna neque viverra""" | |
counts = {} | |
for char in text: | |
if char not in counts: | |
counts[char] = 0 | |
counts[char] += 1 | |
nodes = [] | |
for char in counts: | |
nodes.insert(0, TextNode(char=char, val=counts[char])) | |
if BASE > 2: | |
while len(nodes) % (BASE-1) != 1: | |
nodes.append(TextNode(None, 0)) | |
while len(nodes) != 1: | |
nodes.sort(key=lambda _: _.value) | |
nodes_to_group = nodes[:BASE] | |
for node in nodes_to_group: | |
nodes.remove(node) | |
nodes.insert(0, Node(nodes_to_group)) | |
nodes[0].cleanse_placeholders() | |
print(nodes[0].get_tree()) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment