Skip to content

Instantly share code, notes, and snippets.

@buma
Last active August 29, 2015 13:57
Show Gist options
  • Select an option

  • Save buma/749feb180b9751b2952d to your computer and use it in GitHub Desktop.

Select an option

Save buma/749feb180b9751b2952d to your computer and use it in GitHub Desktop.
Script for adding ref number to bus_stops for OSM before inserting to DB
# -*- coding: utf-8 -*-
from __future__ import print_function
import xml.etree.cElementTree as ET
from lxml import etree
from collections import defaultdict
__author__ = 'MaBu'
file_path = './deli rute.osm'
file_path_output = './deli rute_out.osm'
#Running time for 588M OSM file 4minutes on Intel(R) Core(TM) i5-3570K CPU @3.40GHz
#It uses around 300 MB memory during writing and around half of that during
#reading
file_path = './slovenia-latest.osm'
file_path_output = './slovenia-latest-transformed.osm'
operator = "MARPROM"
nodes_refs = defaultdict(list)
context = etree.iterparse(file_path, events=("end",))
#iteratively parse all relations and creates dictionary datastructure:
#with node_id as key and list of all refs of bus routes that stop on this stop
#as value
for event, elem in context:
if event == 'end':
if elem.tag == 'relation':
tags = {}
for tag in elem.findall('tag'):
tags[tag.get('k')] = tag.get('v')
#import ipdb; ipdb.set_trace()
if tags.get("route") == "bus" and tags.get("operator") == operator:
for member in elem.findall('member'):
if member.get("type", None) == 'node':
node_id = member.get("ref")
ref = tags.get("ref")
if ref == u"Krožna 1":
ref = "1r"
elif ref == u"Krožna 2":
ref = "2r"
elif ref is None:
print ("Ref is None")
print (tags)
print (member.attrib)
if ref is not None:
nodes_refs[node_id].append(ref)
if elem.tag in set(['node', 'way', 'relation', 'bounds']):
#clear reference to the elem
elem.clear()
# Also eliminate now-empty references from the root node to <Title>
while elem.getprevious() is not None:
del elem.getparent()[0]
del context
print("finish reading")
num_of_items = len(nodes_refs)
item_num = 0
print("{0} items".format(num_of_items))
for node_id, refs in nodes_refs.iteritems():
# removes duplicates in ref list (this happens because route is inserted 2
# times maybe some weekday weekend variant
clean_refs = list(set(refs))
nodes_refs[node_id] = clean_refs
#print (node_id, ",".join(clean_refs))
#Hack for iteratively writing out xml
file_out = open(file_path_output, "wb")
file_out.write("<?xml version='1.0' encoding='UTF-8'?>\n")
file_out.write("<osm version='0.6' upload='true' generator='JOSM'>\n")
context = ET.iterparse(file_path, events=("end",))
for event, elem in context:
if elem.tag == 'node' and event == 'end':
node_id = elem.get("id")
# For each bus_stop with bus routes
if node_id in nodes_refs:
refs = nodes_refs[node_id]
# adds tag refs:all refs separated with ;
#key_val = {
#'k': 'refs',
#'v': ";".join(refs),
#}
#new_tag = ET.SubElement(elem, 'tag', key_val)
# adds tag route_ref=yes for each ref
for ref in refs:
key_val = {
'k': 'route_' + ref.strip(),
'v': 'yes'
}
new_tag = ET.SubElement(elem, 'tag', key_val)
# writes tag and all children
if elem.tag in set(['node', 'way', 'relation', 'bounds']):
file_out.write(
ET.tostring(elem, encoding='utf-8'))
#clear reference to the elem
elem.clear()
del context
file_out.write("</osm>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment