shyang · July 26, 2012 11:46
diff --git a/gistfile1.py b/gistfile1.py
 # coding=utf8
 '''
 2012/7/26

 encode and decode a normal tree (children can be more than 2) to and from string (format of your own choice)
 '''

 try:
    from functools import reduce
 except:
    pass

 class Node:
    def __init__(self, value, children=[]):
        self.value = value
        self.children = children

    def __str__(self):
        return str(self.value)

    def __repr__(self):
        return str(self.value)

 def flatten(A): # [[a], [b], [c]] -> [a, b, c]
    return reduce(lambda x, y: x + y, A, [])

 def encode(T):
    if not T:
        return ''
    queue = [[T]]
    levels = []
    while flatten(queue):
        levels.append('|'.join([','.join(map(str, group)) for group in queue]))
        queue = [node.children for node in flatten(queue)]
    return ';'.join(levels)

 def decode(s):
    if not s:
        return None
    levels = s.split(';')
    root = None
    for l in levels:
        group = l.split('|') # [[2, 3, 4]], [[4,5],[],[]] [[7,8], [], [], []]
        for i in range(len(group)):
            current_level = []
            children = group[i].split(',')
            for child in children:
                if not root:
                    root = Node(child, children=[]) # the root [[1]]
                    current_level = [root]
                    continue
                if not child:
                    continue
                node = Node(value=int(child), children=[])
                current_level += [node]
                last_level[i].children.append(node)
            # [4,5] done, next to [[], []]
            if len(group) == 0:
                current_level += [None]
        # one level done
        last_level = current_level
    return root

 T = Node(1, [Node(2, [Node(5), Node(6)]), Node(3), Node(4, [Node(7), Node(8), Node(9)])])

 def p(t):
    return '(' + str(t) + ''.join(list(map(p, t.children))) + ')' if t else '()'

 print('original', p(T))
 enc = encode(T)
 print('encoded ', enc)
 print('decoded ', p(decode(enc)))

 #### simpler version of decoding ####
 import re

 def tokenize(s):
    return re.findall(r'\(|\)|\d+', s)

 def match(ts, t):
    return ts[0] == t

 def expect(ts, t):
    assert match(ts, t)
    ts.pop(0)

 def decode2(ts):
    expect(ts, '(')
    node = None
    if not match(ts, ')'):
        # expect int, not implemented
        node = Node(ts[0], children=[])
        ts.pop(0)
        while not match(ts, ')'):
            node.children.append(decode2(ts))
    expect(ts, ')')
    return node

 print()
 print('encode2 ', p(T))
 print('decode2 ', p(decode2(tokenize(p(T)))))


 ''' output

 original (1(2(5)(6))(3)(4(7)(8)(9)))
 encoded  1;2,3,4;5,6||7,8,9
 decoded  (1(2(5)(6))(3)(4(7)(8)(9)))

 encode2  (1(2(5)(6))(3)(4(7)(8)(9)))
 decode2  (1(2(5)(6))(3)(4(7)(8)(9)))

 '''
	# coding=utf8
	'''
	2012/7/26

	encode and decode a normal tree (children can be more than 2) to and from string (format of your own choice)
	'''

	try:
	from functools import reduce
	except:
	pass

	class Node:
	def __init__(self, value, children=[]):
	self.value = value
	self.children = children

	def __str__(self):
	return str(self.value)

	def __repr__(self):
	return str(self.value)

	def flatten(A): # [[a], [b], [c]] -> [a, b, c]
	return reduce(lambda x, y: x + y, A, [])

	def encode(T):
	if not T:
	return ''
	queue = [[T]]
	levels = []
	while flatten(queue):
	levels.append('\|'.join([','.join(map(str, group)) for group in queue]))
	queue = [node.children for node in flatten(queue)]
	return ';'.join(levels)

	def decode(s):
	if not s:
	return None
	levels = s.split(';')
	root = None
	for l in levels:
	group = l.split('\|') # [[2, 3, 4]], [[4,5],[],[]] [[7,8], [], [], []]
	for i in range(len(group)):
	current_level = []
	children = group[i].split(',')
	for child in children:
	if not root:
	root = Node(child, children=[]) # the root [[1]]
	current_level = [root]
	continue
	if not child:
	continue
	node = Node(value=int(child), children=[])
	current_level += [node]
	last_level[i].children.append(node)
	# [4,5] done, next to [[], []]
	if len(group) == 0:
	current_level += [None]
	# one level done
	last_level = current_level
	return root

	T = Node(1, [Node(2, [Node(5), Node(6)]), Node(3), Node(4, [Node(7), Node(8), Node(9)])])

	def p(t):
	return '(' + str(t) + ''.join(list(map(p, t.children))) + ')' if t else '()'

	print('original', p(T))
	enc = encode(T)
	print('encoded ', enc)
	print('decoded ', p(decode(enc)))

	#### simpler version of decoding ####
	import re

	def tokenize(s):
	return re.findall(r'\(\|\)\|\d+', s)

	def match(ts, t):
	return ts[0] == t

	def expect(ts, t):
	assert match(ts, t)
	ts.pop(0)

	def decode2(ts):
	expect(ts, '(')
	node = None
	if not match(ts, ')'):
	# expect int, not implemented
	node = Node(ts[0], children=[])
	ts.pop(0)
	while not match(ts, ')'):
	node.children.append(decode2(ts))
	expect(ts, ')')
	return node

	print()
	print('encode2 ', p(T))
	print('decode2 ', p(decode2(tokenize(p(T)))))


	''' output

	original (1(2(5)(6))(3)(4(7)(8)(9)))
	encoded 1;2,3,4;5,6\|\|7,8,9
	decoded (1(2(5)(6))(3)(4(7)(8)(9)))

	encode2 (1(2(5)(6))(3)(4(7)(8)(9)))
	decode2 (1(2(5)(6))(3)(4(7)(8)(9)))

	'''