Last active
September 17, 2015 22:47
-
-
Save Flushot/24f08b7f8e51c3919de7 to your computer and use it in GitHub Desktop.
Builds a graph visualization (viewable in yEd) of linked pages in a GitHub wiki repository
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import os | |
| import re | |
| import sys | |
| import networkx as nx | |
| import lxml.etree as et | |
| def walk_path(start_path, match_regex=r'.*', recursive=True): | |
| if os.path.isdir(start_path): | |
| match_pat = re.compile(match_regex) | |
| for dirEntry in os.listdir(start_path): | |
| file_path = os.path.join(start_path, dirEntry) | |
| if os.path.isdir(file_path): | |
| if recursive: | |
| walk_path(file_path, match_regex, recursive) | |
| elif match_pat.search(file_path): | |
| yield file_path | |
| else: | |
| yield start_path | |
| def create_node(graph, label): | |
| graph.add_node(label) | |
| graph.node[label]['label'] = label | |
| return label | |
| def create_digraph(adjacency_list): | |
| graph = nx.DiGraph() | |
| added_nodes = set() | |
| def add_node(node_name): | |
| if node_name not in added_nodes: | |
| create_node(graph, node_name) | |
| return node_name | |
| for source_node, target_node in adjacency_list: | |
| graph.add_edge(add_node(source_node), | |
| add_node(target_node)) | |
| return graph | |
| def parse_document_name(file_path): | |
| match = re.search(r'(.+?)\.md$', file_path) | |
| if match is not None: | |
| return match.group(1) | |
| def parse_out_documents(file_path): | |
| link_pat = re.compile(r'\[.+?\]\((.+?)(#.+?)?\)', re.MULTILINE) | |
| visited = set() | |
| with open(file_path) as f: | |
| contents = f.read() | |
| for link_match in link_pat.finditer(contents): | |
| href = link_match.group(1) | |
| if '://' not in href and href not in visited: | |
| visited.add(href) | |
| yield href | |
| def build_link_graph(path): | |
| all_nodes = set() | |
| nodes_with_inlinks = set() | |
| # add linked nodes | |
| link_edges = set() | |
| for file_path in walk_path('.', match_regex=r'\.md$'): | |
| file_path = os.path.relpath(file_path) | |
| source_document = parse_document_name(file_path) | |
| all_nodes.add(source_document) | |
| for target_document in parse_out_documents(file_path): | |
| link_edges.add((source_document, target_document)) | |
| nodes_with_inlinks.add(target_document) | |
| graph = create_digraph(link_edges) | |
| # add orphaned nodes | |
| orphaned_nodes = all_nodes - nodes_with_inlinks | |
| for node_name in orphaned_nodes: | |
| create_node(graph, node_name) | |
| return graph | |
| def main(): | |
| graph = build_link_graph('.') | |
| # nx.write_graphml(graph, sys.stdout) | |
| xml_root = build_yed_graphml(graph) | |
| print et.tostring(xml_root, | |
| encoding='unicode', | |
| pretty_print=True) | |
| # TODO: move to a graphml module | |
| def build_yed_graphml(graph, | |
| node_width=150.0, | |
| node_height=30.0, | |
| node_fill_color='#FFCC00', | |
| node_border_color='#000000', | |
| node_text_color='#000000', | |
| node_font_size=12): | |
| # Layout graph | |
| node_positions = nx.spring_layout(graph, | |
| iterations=100, | |
| scale=1000) | |
| # GraphML doc | |
| y_ns = 'http://www.yworks.com/xml/graphml' | |
| graphml_el = et.Element('{http://graphml.graphdrawing.org/xmlns}graphml', | |
| nsmap={ | |
| None: 'http://graphml.graphdrawing.org/xmlns', # Default ns | |
| 'java': 'http://www.yworks.com/xml/yfiles-common/1.0/java', | |
| 'sys': 'http://www.yworks.com/xml/yfiles-common/markup/primitives/2.0', | |
| 'x': 'http://www.yworks.com/xml/yfiles-common/markup/2.0', | |
| 'y': y_ns, | |
| 'yed': 'http://www.yworks.com/xml/yed/3' | |
| }) | |
| # Attribs | |
| key_d0_el = et.SubElement(graphml_el, 'key', | |
| { | |
| 'id': 'd0', | |
| 'for': 'node', | |
| 'attr.name': 'label', | |
| 'attr.type': 'string' | |
| }) | |
| key_d1_el = et.SubElement(graphml_el, 'key', | |
| { | |
| 'id': 'd1', | |
| 'for': 'node', | |
| 'yfiles.type': 'nodegraphics' | |
| }) | |
| # Graph | |
| graph_el = et.SubElement(graphml_el, 'graph', | |
| { | |
| 'edgedefault': 'directed', | |
| 'id': 'G' | |
| }) | |
| # Nodes | |
| graph_el.append(et.Comment('Nodes')) | |
| for node_name, node_data in graph.nodes(data=True): | |
| node_el = et.SubElement(graph_el, 'node', | |
| {'id': node_name}) | |
| # GraphML: Label | |
| d0_el = et.SubElement(node_el, 'data', {'key': 'd0'}) | |
| d0_el.text = et.CDATA(node_data['label']) | |
| # yEd: Node-specific attribs | |
| d1_el = et.SubElement(node_el, 'data', {'key': 'd1'}) | |
| shape_node_el = et.SubElement(d1_el, '{%s}ShapeNode' % y_ns) | |
| # yEd: Size and position | |
| x_pos, y_pos = node_positions[node_name] | |
| geometry_el = et.SubElement(shape_node_el, '{%s}Geometry' % y_ns, | |
| { | |
| 'height': str(node_height), | |
| 'width': str(node_width), | |
| 'x': str(x_pos), | |
| 'y': str(y_pos) | |
| }) | |
| # yEd: Shape and colors | |
| shape_el = et.SubElement(shape_node_el, '{%s}Shape' % y_ns, | |
| {'type': 'rectangle'}) | |
| fill_el = et.SubElement(shape_node_el, '{%s}Fill' % y_ns, | |
| { | |
| 'color': node_fill_color, | |
| 'transparent': 'false' | |
| }) | |
| border_style_el = et.SubElement(shape_node_el, '{%s}BorderStyle' % y_ns, | |
| { | |
| 'color': node_border_color, | |
| 'type': 'line', | |
| 'width': '1.0' | |
| }) | |
| # yEd: Label | |
| node_label_el = et.SubElement(shape_node_el, '{%s}NodeLabel' % y_ns, | |
| { | |
| 'alignment': 'center', | |
| 'autoSizePolicy': 'content', | |
| 'fontFamily': 'Dialog', | |
| 'fontSize': str(node_font_size), | |
| 'fontStyle': 'plain', | |
| 'hasBackgroundColor': 'false', | |
| 'hasLineColor': 'false', | |
| 'modelName': 'custom', | |
| 'textColor': node_text_color, | |
| 'visible': 'true' | |
| }) | |
| node_label_el.text = node_data['label'] | |
| # Edges | |
| graph_el.append(et.Comment('Edges')) | |
| for source_node, target_node in graph.edges(): | |
| edge_el = et.SubElement(graph_el, 'edge', | |
| { | |
| 'source': source_node, | |
| 'target': target_node | |
| }) | |
| return graphml_el | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment