Created
July 26, 2012 11:46
-
-
Save shyang/3181616 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf8 | |
''' | |
2012/7/26 | |
encode and decode a normal tree (children can be more than 2) to and from string (format of your own choice) | |
''' | |
try: | |
from functools import reduce | |
except: | |
pass | |
class Node: | |
def __init__(self, value, children=[]): | |
self.value = value | |
self.children = children | |
def __str__(self): | |
return str(self.value) | |
def __repr__(self): | |
return str(self.value) | |
def flatten(A): # [[a], [b], [c]] -> [a, b, c] | |
return reduce(lambda x, y: x + y, A, []) | |
def encode(T): | |
if not T: | |
return '' | |
queue = [[T]] | |
levels = [] | |
while flatten(queue): | |
levels.append('|'.join([','.join(map(str, group)) for group in queue])) | |
queue = [node.children for node in flatten(queue)] | |
return ';'.join(levels) | |
def decode(s): | |
if not s: | |
return None | |
levels = s.split(';') | |
root = None | |
for l in levels: | |
group = l.split('|') # [[2, 3, 4]], [[4,5],[],[]] [[7,8], [], [], []] | |
for i in range(len(group)): | |
current_level = [] | |
children = group[i].split(',') | |
for child in children: | |
if not root: | |
root = Node(child, children=[]) # the root [[1]] | |
current_level = [root] | |
continue | |
if not child: | |
continue | |
node = Node(value=int(child), children=[]) | |
current_level += [node] | |
last_level[i].children.append(node) | |
# [4,5] done, next to [[], []] | |
if len(group) == 0: | |
current_level += [None] | |
# one level done | |
last_level = current_level | |
return root | |
T = Node(1, [Node(2, [Node(5), Node(6)]), Node(3), Node(4, [Node(7), Node(8), Node(9)])]) | |
def p(t): | |
return '(' + str(t) + ''.join(list(map(p, t.children))) + ')' if t else '()' | |
print('original', p(T)) | |
enc = encode(T) | |
print('encoded ', enc) | |
print('decoded ', p(decode(enc))) | |
#### simpler version of decoding #### | |
import re | |
def tokenize(s): | |
return re.findall(r'\(|\)|\d+', s) | |
def match(ts, t): | |
return ts[0] == t | |
def expect(ts, t): | |
assert match(ts, t) | |
ts.pop(0) | |
def decode2(ts): | |
expect(ts, '(') | |
node = None | |
if not match(ts, ')'): | |
# expect int, not implemented | |
node = Node(ts[0], children=[]) | |
ts.pop(0) | |
while not match(ts, ')'): | |
node.children.append(decode2(ts)) | |
expect(ts, ')') | |
return node | |
print() | |
print('encode2 ', p(T)) | |
print('decode2 ', p(decode2(tokenize(p(T))))) | |
''' output | |
original (1(2(5)(6))(3)(4(7)(8)(9))) | |
encoded 1;2,3,4;5,6||7,8,9 | |
decoded (1(2(5)(6))(3)(4(7)(8)(9))) | |
encode2 (1(2(5)(6))(3)(4(7)(8)(9))) | |
decode2 (1(2(5)(6))(3)(4(7)(8)(9))) | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment