Skip to content

Instantly share code, notes, and snippets.

@zomux
Created December 19, 2019 06:08
Show Gist options
  • Save zomux/d379623be78d792b6661b6f9b210001b to your computer and use it in GitHub Desktop.
Save zomux/d379623be78d792b6661b6f9b210001b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from nltk.tree import ParentedTree
import sys, os
import re
def get_top_k_levels(cfg_string, k=3):
tree = ParentedTree.fromstring(cfg_string)
# Go to "S" node but not root
if len(tree) == 1:
tree = list(tree)[0]
level_count = 1
queue = [tree]
while queue:
size = len(queue)
for _ in range(size):
node = queue.pop()
# first remove all strings
children = list(node)
for child in children:
if type(child) == str:
node.remove(child)
# remove all lower level nodes if reaching K
if level_count >= k:
node.clear()
else:
queue.extend(list(node))
level_count += 1
return_str = tree.__str__()
return_str = return_str.replace("\r", "")
return_str = return_str.replace("\n", "")
return_str = re.sub(r"\s+", " ", return_str)
return_str = re.sub(r"\(([^\(\) ]+) \)", "\\1", return_str)
return return_str
if __name__ == '__main__':
# cfg_content = "(ROOT (NP (NP (NNP Rachel) (NNP Pike)) (: :) (NP (NP (DT the) (NN science)) (PP (IN behind) (NP (DT a) (NN climate) (NN headline))))))"
# get_top_k_levels(cfg_content)
for line in sys.stdin:
print(get_top_k_levels(line.strip()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment