Created
June 17, 2021 23:15
-
-
Save tylertreat/7e02ebd3fdfae2cc8106b997e8c29e52 to your computer and use it in GitHub Desktop.
Python script for parsing GCP flow log json from BigQuery and outputting DOT format for Graphviz
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import sys | |
def parse(input_file_name, output_file_name): | |
with open(input_file_name) as f: | |
data = json.load(f) | |
vpcs = {} | |
# Create flow data structures | |
for flow in data: | |
vpc = _get_or_create_vpc(vpcs, flow['src_vpc']) | |
subnet = _get_or_create_subnet(vpc, flow['src_subnet'], flow['src_zone']) | |
vms = subnet['vms'] | |
vms[flow['src_vm']] = flow['src_ip'] | |
vpc = _get_or_create_vpc(vpcs, flow['dest_vpc']) | |
subnet = _get_or_create_subnet(vpc, flow['dest_subnet'], flow['dest_zone']) | |
vms = subnet['vms'] | |
vms[flow['dest_vm']] = flow['dest_ip'] | |
# Output graph in dot format | |
with open(output_file_name, 'w') as f: | |
f.write('digraph G {\n') | |
for name, vpc in vpcs.items(): | |
f.write(' subgraph cluster_' + name.replace('-', '_') + ' {\n') | |
f.write(' label = "vpc ' + name + '";\n') | |
for subnet_name, subnet in vpc.items(): | |
f.write(' subgraph cluster_' + subnet_name.replace('-', '_') + ' {\n') | |
f.write(' label = <subnet ' + subnet_name + '<BR /><FONT POINT-SIZE="10">' + subnet['zone'] + '</FONT>>;\n') | |
for vm, ip in subnet['vms'].items(): | |
f.write(' "' + vm + '" [label = <' + vm + '<BR /><FONT POINT-SIZE="10">' + ip + '</FONT>>];\n') | |
f.write(' }\n') | |
f.write(' }\n') | |
f.write('\n') | |
for flow in data: | |
f.write(' "' + flow['src_vm'] + '" -> "' + flow['dest_vm'] + '" [label = "' + str(int(int(flow['bytes_sent'])/1024)) + ' kb"];\n') | |
f.write('}') | |
def _get_or_create_vpc(vpcs, vpc_name): | |
vpc = vpcs.get(vpc_name) | |
if not vpc: | |
vpc = {} | |
vpcs[vpc_name] = vpc | |
return vpc | |
def _get_or_create_subnet(vpc, subnet_name, zone): | |
subnet = vpc.get(subnet_name) | |
if not subnet: | |
subnet = {'zone': zone, 'vms': {}} | |
vpc[subnet_name] = subnet | |
return subnet | |
# Arguments: flow log json file and dot output file | |
if __name__ == '__main__': | |
parse(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment