Skip to content

Instantly share code, notes, and snippets.

@categulario
Last active July 29, 2016 16:17
Show Gist options
  • Select an option

  • Save categulario/9422aa048cbdbca942ef82bfd515af91 to your computer and use it in GitHub Desktop.

Select an option

Save categulario/9422aa048cbdbca942ef82bfd515af91 to your computer and use it in GitHub Desktop.
given an array of strings make a tree of common prefixes
import unittest
from itertools import takewhile, dropwhile, zip_longest
import json
from pprint import pprint
def largest_prefix(string1, string2):
return '/'.join(
map(
lambda x:x[0],
takewhile(
lambda x:x[0]==x[1],
zip(string1.split('/'), string2.split('/'))
)
)
)
def discard_prefix(prefix, string):
return '/'.join(
map(
lambda x: x[1],
dropwhile(
lambda x: x[0] == x[1],
zip_longest(prefix.split('/'), string.split('/'))
)
)
)
class Group:
"""A group"""
def __init__(self, string):
self.name = string
self.prefix = string.split('/')[0]
self.children = []
def has_common_prefix(self, string):
return self.prefix == string.split('/')[0]
def process(self, string):
lcp = largest_prefix(self.name, string)
if lcp == self.name:
for group in self.children:
subname = discard_prefix(lcp, string)
if group.has_common_prefix(subname):
group.process(subname)
break
else:
# just append this group to the children
self.children.append(Group(discard_prefix(self.name, string)))
else:
if not self.children:
# fork this group into subgroups
self.children.append(Group(discard_prefix(lcp, self.name)))
self.children.append(Group(discard_prefix(lcp, string)))
self.name = lcp
else:
# move children to a subgroup and fork
g1 = Group(discard_prefix(lcp, self.name))
g1.children = self.children
self.children = []
self.children.append(g1)
self.children.append(Group(discard_prefix(lcp, string)))
self.name = lcp
def __repr__(self):
return repr({
"name": self.name,
"children": self.children
})
def make_tree(string_array):
groups = []
for string in string_array:
for group in groups:
if group.has_common_prefix(string):
group.process(string)
break
else:
groups.append(Group(string))
return groups
class TestMakeTree(unittest.TestCase):
def test_largest_prefix(self):
self.assertEqual('ags/ags', largest_prefix('ags/ags/foo', 'ags/ags/var'))
self.assertEqual('ags', largest_prefix('ags/log/foo', 'ags/ags/var'))
self.assertEqual('', largest_prefix('var/log/foo', 'ags/ags/var'))
self.assertEqual('ags/ags', largest_prefix('ags/ags/foo', 'ags/ags/var/vaz'))
self.assertEqual('ags/ags/var', largest_prefix('ags/ags/var', 'ags/ags/var/vaz'))
def test_discard_prefix(self):
self.assertEqual('var', discard_prefix('foo/var', 'foo/var/var'))
self.assertEqual('var/log', discard_prefix('foo', 'foo/var/log'))
self.assertEqual('var/log/baz', discard_prefix('', 'var/log/baz'))
def test_one(self):
res = make_tree(['ags/ags/uni'])
self.assertEqual(1, len(res))
self.assertEqual('ags/ags/uni', res[0].name)
def test_two(self):
res = make_tree(['ags/ags/uni', 'ags/ags/log'])
self.assertEqual(1, len(res))
self.assertEqual('ags/ags', res[0].name)
self.assertEqual('uni', res[0].children[0].name)
self.assertEqual('log', res[0].children[1].name)
def test_two2(self):
res = make_tree(['ags/ags/uni', 'ags/var/log'])
self.assertEqual(1, len(res))
self.assertEqual('ags', res[0].name)
self.assertEqual('ags/uni', res[0].children[0].name)
self.assertEqual('var/log', res[0].children[1].name)
def test_break_broken(self):
res = make_tree(['ags/ags/foo', 'ags/ags/var', 'ags/log/baz'])
self.assertEqual(1, len(res))
self.assertEqual('ags', res[0].name)
self.assertEqual(2, len(res[0].children))
self.assertEqual('ags', res[0].children[0].name)
self.assertEqual('log/baz', res[0].children[1].name)
self.assertEqual(2, len(res[0].children[0].children))
self.assertEqual('foo', res[0].children[0].children[0].name)
self.assertEqual('var', res[0].children[0].children[1].name)
def test_deep_break(self):
res = make_tree([
'ags/log/baz',
'ags/var/foo',
'ags/log/foo',
])
self.assertEqual(1, len(res))
self.assertEqual('ags', res[0].name)
self.assertEqual(2, len(res[0].children))
self.assertEqual('log', res[0].children[0].name)
self.assertEqual('var/foo', res[0].children[1].name)
self.assertEqual(2, len(res[0].children[0].children))
self.assertEqual('baz', res[0].children[0].children[0].name)
self.assertEqual('foo', res[0].children[0].children[1].name)
def test_cdmx_case(self):
res = make_tree([
'CDMX/JDP/JDP',
'CDMX/JRC/JRC',
'CDMX/MEX/ATI',
'CDMX/MEX/NAU',
])
self.assertEqual(1, len(res))
self.assertEqual('CDMX', res[0].name)
self.assertEqual(3, len(res[0].children))
self.assertEqual('JDP/JDP', res[0].children[0].name)
self.assertEqual('JRC/JRC', res[0].children[1].name)
self.assertEqual('MEX', res[0].children[2].name)
self.assertEqual(2, len(res[0].children[2].children))
self.assertEqual('ATI', res[0].children[2].children[0].name)
self.assertEqual('NAU', res[0].children[2].children[1].name)
if __name__ == '__main__':
res = make_tree(['ags/ags/foo', 'ags/ags/var', 'ags/log/baz'])
pprint(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment