This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script parses the RCV1 topics into a tree structure | |
# It can then be exported to json or dotfile format | |
# For more info on RCV1, see | |
# http://jmlr.csail.mit.edu/papers/volume5/lewis04a/lewis04a.pdf | |
import re | |
from treelib import Tree | |
from treelib.plugins import export_to_dot | |
# read topics from flat file into a list of lists |