gregmacfarlane · September 15, 2014 20:50 · jinkai89 · Feb 2, 2018
diff --git a/shp2xml.py b/shp2xml.py
 # Code to read the FAF network
 # Greg Macfarlane | Parsons Brinckerhoff


 # ORNL provides the FAF network as a TransCAD network, and also as a shapefile
 # that I think is probably exported from the TransCAD network. Kyle Ward in the
 # Raleigh office tried to export an xml file from TransCAD, but it wasn't quite
 # right. So I decided to write this script to make an xml MATSim network from
 # the shapefile.

 # The networkx library contains general graph functionality, including the
 # ability to read a line shapefile as a graph. I tested its capability by
 # exporting the network as a line shapefile of edges and a point shapefile of
 # nodes, and was satisfied.
 import networkx as nx

 # To write the graph to xml, we use commands in the lxml library; we need to use
 # this instead of the ElementTree library that comes standard with Python
 # installations so that we can `pretty_print` the xml output.
 import lxml.etree as ET

 
 # When we read in G, the node labels are the coordinate tuples, e.g.
 # (-111.458149, 38.108458). This isn't very useful, so we change them into
 # integers beginning at 0. Also, MATSim likes to use coordinates in meters, so
 # we use a version of the network that we exported into EPSG: 2818, a Lambert
 # Conformal Conic projection centered on Montana. This projection gives
 # coordinates in meters, and has the added LCC feature of minimizing distance
 # distortion across a wide area of the map.
 G = nx.read_shp("fafnetworkLCC.shp")

 start = 0
 G = nx.convert_node_labels_to_integers(G, first_label=start, 
        label_attribute = "coord")


 # MATSIM NETWORK BUILDER
 # =============================================

 # We need to build an element tree to store the node and edge attributes. We
 # follow the [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd), 
 # as closely as possible; this calls for an xml structure
 #
 #  <network><nodes><node ...></nodes><links><link ....></links></network>. 
 #
 # So in the element tree, the parent element is `network`, with two children,
 # `links` and `nodes`. The number of individual `node` and `link` children is
 # dependent on the size of the network. 
 # In general we follow the 
 # [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd)

 network = ET.Element("network", 
        attrib={'name':"MATSim network exported from FAF shapefile."})


 # Node structure
 # ----------------------------------------------
 nodes = ET.SubElement(network, "nodes")

 # Get node attributes by looping through all the nodes in graph. We can pull the
 # id and coordinate attributes from the node attributes. The MATSim
 # specification allows for a "type" attribute to distinguish between networks.
 # I don't know that this is strictly necessary, but the example network uses "2"
 # for all nodes, so I'll continue that convention to limit as many differences
 # as possible.
 # 
 #   <node id, x, y, type = "2")
 for i in range(len(G)):
    ET.SubElement(nodes, "node", 
            attrib={'id': str(G.nodes()[i]), 
                    'x':str(G.node[i]['coord'][0]), 
                    'y':str(G.node[i]['coord'][1]),
                    'type':"2"})


 # Link structure
 # -------------------------------------------
 # There are a number of general link attributes that we need to set at the
 # parent element. The specification says defaults are available, but we will set
 # them anyways. The period over which link capacity is recorded `capperiod` is
 # one hour, the effective length of vehicles `effectivecellcize` is 7.5 meters
 # (we might want to change this, because we're dealing with trucks?), and the
 # effective lane width is 3.75 meters
 links = ET.SubElement(network, "links", 
        attrib={'capperiod': "01:00:00",
                'effectivecellsize': "7.5",
                'effectivelanewidth': "3.75"})



 # The MATSim network specification has a number of required fields and some that
 # are not required. The required links are the link ID, the "from" node ID, the
 # "to" node ID, and the distance, or the "length". The FAF network gives the
 # distance in miles, but the network wants meters, so we need to apply a
 # conversion factor.

 # The `capacity` attribute is not listed as mandatory, but we needed to set the
 # capperiod, so I'm worried that this is actually necessary. For now we'll set
 # a constant value at 2000 veh/hrln * 3 lanes = 6000 veh/hr. We might be able to
 # set this more precisely using the `SIGNT1` network attribute that determines
 # whether a road is an interstate, state highway, etc.

 # Other optional attributes include free flow speed, permanent lanes, whether
 # the link is one-way (see discussion of reversed links below), the modes,
 # permitted on the link (I don't know how this is supposed to tie into the node
 # type above). We also append an additional attribute GEOID, constructed from
 # the state and county FIPS codes. This may prove useful.

 # To simplify the code, we want to extract the edge attributes into a form where
 # we can get them as variable[(start,end)]
 length = nx.get_edge_attributes(G, "MILES")
 idvar  = nx.get_edge_attributes(G, "ID")
 ctfips = nx.get_edge_attributes(G, "CTFIPS")
 stfips = nx.get_edge_attributes(G, "STFIPS")
 fclass = nx.get_edge_attributes(G, "SIGNT1")

 for i in range(len(G.edges())):
    startnode = G.edges()[i][0]
    endnode = G.edges()[i][1]
    # create link SubElement with attributes pulled from the lists you got above.
    ET.SubElement(links, "link", attrib={ 
        'id': str(idvar[(startnode, endnode)]),
        'from': str(startnode), 
        'to':   str(endnode), 
        'capacity': str(6000),
        'modes': "car",
        'oneway': str(1),
        'type': str(10),
        'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
        'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})


 # One extra item I noted in the MATSim documentation is that all links 
 # **must be** one-way. So we need to create all of the same links again, but
 # with start and end nodes swapped. Also, we'll have to make a new link id.
 # Because there are about ~180,000 links in the network if we add 1e6 to each of
 # the IDs, we won't duplicate any labels. Also, link `2183` will have the
 # ID `1002183`, for its reverse partner, which might be nice.
 for i in range(len(G.edges())):
    startnode = G.edges()[i][0]
    endnode = G.edges()[i][1]
    ET.SubElement(links, "link", attrib={ 
        'id': str(int(idvar[(startnode, endnode)] + 1e6)), #create unique id as above
        'from': str(endnode), #swap to and from node ids
        'to':   str(startnode), 
        'capacity': str(6000),
        'modes': "car",
        'oneway': str(1),
        'type': str(10),
        'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
        'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})



 # We can now wrap the entire element tree and write it to an xml file with the
 # same headers that came in the original file.
 tree = ET.ElementTree(network)

 with open('network.xml', 'w') as f:
    f.write("""<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE network SYSTEM "http://www.matsim.org/files/dtd/network_v1.dtd">
 """)
    tree.write(f, pretty_print = True)
	# Code to read the FAF network
	# Greg Macfarlane \| Parsons Brinckerhoff


	# ORNL provides the FAF network as a TransCAD network, and also as a shapefile
	# that I think is probably exported from the TransCAD network. Kyle Ward in the
	# Raleigh office tried to export an xml file from TransCAD, but it wasn't quite
	# right. So I decided to write this script to make an xml MATSim network from
	# the shapefile.

	# The networkx library contains general graph functionality, including the
	# ability to read a line shapefile as a graph. I tested its capability by
	# exporting the network as a line shapefile of edges and a point shapefile of
	# nodes, and was satisfied.
	import networkx as nx

	# To write the graph to xml, we use commands in the lxml library; we need to use
	# this instead of the ElementTree library that comes standard with Python
	# installations so that we can `pretty_print` the xml output.
	import lxml.etree as ET


	# When we read in G, the node labels are the coordinate tuples, e.g.
	# (-111.458149, 38.108458). This isn't very useful, so we change them into
	# integers beginning at 0. Also, MATSim likes to use coordinates in meters, so
	# we use a version of the network that we exported into EPSG: 2818, a Lambert
	# Conformal Conic projection centered on Montana. This projection gives
	# coordinates in meters, and has the added LCC feature of minimizing distance
	# distortion across a wide area of the map.
	G = nx.read_shp("fafnetworkLCC.shp")

	start = 0
	G = nx.convert_node_labels_to_integers(G, first_label=start,
	label_attribute = "coord")


	# MATSIM NETWORK BUILDER
	# =============================================

	# We need to build an element tree to store the node and edge attributes. We
	# follow the [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd),
	# as closely as possible; this calls for an xml structure
	#
	# <network><nodes><node ...></nodes><links><link ....></links></network>.
	#
	# So in the element tree, the parent element is `network`, with two children,
	# `links` and `nodes`. The number of individual `node` and `link` children is
	# dependent on the size of the network.
	# In general we follow the
	# [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd)

	network = ET.Element("network",
	attrib={'name':"MATSim network exported from FAF shapefile."})


	# Node structure
	# ----------------------------------------------
	nodes = ET.SubElement(network, "nodes")

	# Get node attributes by looping through all the nodes in graph. We can pull the
	# id and coordinate attributes from the node attributes. The MATSim
	# specification allows for a "type" attribute to distinguish between networks.
	# I don't know that this is strictly necessary, but the example network uses "2"
	# for all nodes, so I'll continue that convention to limit as many differences
	# as possible.
	#
	# <node id, x, y, type = "2")
	for i in range(len(G)):
	ET.SubElement(nodes, "node",
	attrib={'id': str(G.nodes()[i]),
	'x':str(G.node[i]['coord'][0]),
	'y':str(G.node[i]['coord'][1]),
	'type':"2"})


	# Link structure
	# -------------------------------------------
	# There are a number of general link attributes that we need to set at the
	# parent element. The specification says defaults are available, but we will set
	# them anyways. The period over which link capacity is recorded `capperiod` is
	# one hour, the effective length of vehicles `effectivecellcize` is 7.5 meters
	# (we might want to change this, because we're dealing with trucks?), and the
	# effective lane width is 3.75 meters
	links = ET.SubElement(network, "links",
	attrib={'capperiod': "01:00:00",
	'effectivecellsize': "7.5",
	'effectivelanewidth': "3.75"})



	# The MATSim network specification has a number of required fields and some that
	# are not required. The required links are the link ID, the "from" node ID, the
	# "to" node ID, and the distance, or the "length". The FAF network gives the
	# distance in miles, but the network wants meters, so we need to apply a
	# conversion factor.

	# The `capacity` attribute is not listed as mandatory, but we needed to set the
	# capperiod, so I'm worried that this is actually necessary. For now we'll set
	# a constant value at 2000 veh/hrln * 3 lanes = 6000 veh/hr. We might be able to
	# set this more precisely using the `SIGNT1` network attribute that determines
	# whether a road is an interstate, state highway, etc.

	# Other optional attributes include free flow speed, permanent lanes, whether
	# the link is one-way (see discussion of reversed links below), the modes,
	# permitted on the link (I don't know how this is supposed to tie into the node
	# type above). We also append an additional attribute GEOID, constructed from
	# the state and county FIPS codes. This may prove useful.

	# To simplify the code, we want to extract the edge attributes into a form where
	# we can get them as variable[(start,end)]
	length = nx.get_edge_attributes(G, "MILES")
	idvar = nx.get_edge_attributes(G, "ID")
	ctfips = nx.get_edge_attributes(G, "CTFIPS")
	stfips = nx.get_edge_attributes(G, "STFIPS")
	fclass = nx.get_edge_attributes(G, "SIGNT1")

	for i in range(len(G.edges())):
	startnode = G.edges()[i][0]
	endnode = G.edges()[i][1]
	# create link SubElement with attributes pulled from the lists you got above.
	ET.SubElement(links, "link", attrib={
	'id': str(idvar[(startnode, endnode)]),
	'from': str(startnode),
	'to': str(endnode),
	'capacity': str(6000),
	'modes': "car",
	'oneway': str(1),
	'type': str(10),
	'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
	'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})


	# One extra item I noted in the MATSim documentation is that all links
	# must be one-way. So we need to create all of the same links again, but
	# with start and end nodes swapped. Also, we'll have to make a new link id.
	# Because there are about ~180,000 links in the network if we add 1e6 to each of
	# the IDs, we won't duplicate any labels. Also, link `2183` will have the
	# ID `1002183`, for its reverse partner, which might be nice.
	for i in range(len(G.edges())):
	startnode = G.edges()[i][0]
	endnode = G.edges()[i][1]
	ET.SubElement(links, "link", attrib={
	'id': str(int(idvar[(startnode, endnode)] + 1e6)), #create unique id as above
	'from': str(endnode), #swap to and from node ids
	'to': str(startnode),
	'capacity': str(6000),
	'modes': "car",
	'oneway': str(1),
	'type': str(10),
	'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
	'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})



	# We can now wrap the entire element tree and write it to an xml file with the
	# same headers that came in the original file.
	tree = ET.ElementTree(network)

	with open('network.xml', 'w') as f:
	f.write("""<?xml version="1.0" encoding="UTF-8" ?>
	<!DOCTYPE network SYSTEM "http://www.matsim.org/files/dtd/network_v1.dtd">
	""")
	tree.write(f, pretty_print = True)