Last active
August 29, 2015 13:57
-
-
Save buma/749feb180b9751b2952d to your computer and use it in GitHub Desktop.
Script for adding ref number to bus_stops for OSM before inserting to DB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| from __future__ import print_function | |
| import xml.etree.cElementTree as ET | |
| from lxml import etree | |
| from collections import defaultdict | |
| __author__ = 'MaBu' | |
| file_path = './deli rute.osm' | |
| file_path_output = './deli rute_out.osm' | |
| #Running time for 588M OSM file 4minutes on Intel(R) Core(TM) i5-3570K CPU @3.40GHz | |
| #It uses around 300 MB memory during writing and around half of that during | |
| #reading | |
| file_path = './slovenia-latest.osm' | |
| file_path_output = './slovenia-latest-transformed.osm' | |
| operator = "MARPROM" | |
| nodes_refs = defaultdict(list) | |
| context = etree.iterparse(file_path, events=("end",)) | |
| #iteratively parse all relations and creates dictionary datastructure: | |
| #with node_id as key and list of all refs of bus routes that stop on this stop | |
| #as value | |
| for event, elem in context: | |
| if event == 'end': | |
| if elem.tag == 'relation': | |
| tags = {} | |
| for tag in elem.findall('tag'): | |
| tags[tag.get('k')] = tag.get('v') | |
| #import ipdb; ipdb.set_trace() | |
| if tags.get("route") == "bus" and tags.get("operator") == operator: | |
| for member in elem.findall('member'): | |
| if member.get("type", None) == 'node': | |
| node_id = member.get("ref") | |
| ref = tags.get("ref") | |
| if ref == u"Krožna 1": | |
| ref = "1r" | |
| elif ref == u"Krožna 2": | |
| ref = "2r" | |
| elif ref is None: | |
| print ("Ref is None") | |
| print (tags) | |
| print (member.attrib) | |
| if ref is not None: | |
| nodes_refs[node_id].append(ref) | |
| if elem.tag in set(['node', 'way', 'relation', 'bounds']): | |
| #clear reference to the elem | |
| elem.clear() | |
| # Also eliminate now-empty references from the root node to <Title> | |
| while elem.getprevious() is not None: | |
| del elem.getparent()[0] | |
| del context | |
| print("finish reading") | |
| num_of_items = len(nodes_refs) | |
| item_num = 0 | |
| print("{0} items".format(num_of_items)) | |
| for node_id, refs in nodes_refs.iteritems(): | |
| # removes duplicates in ref list (this happens because route is inserted 2 | |
| # times maybe some weekday weekend variant | |
| clean_refs = list(set(refs)) | |
| nodes_refs[node_id] = clean_refs | |
| #print (node_id, ",".join(clean_refs)) | |
| #Hack for iteratively writing out xml | |
| file_out = open(file_path_output, "wb") | |
| file_out.write("<?xml version='1.0' encoding='UTF-8'?>\n") | |
| file_out.write("<osm version='0.6' upload='true' generator='JOSM'>\n") | |
| context = ET.iterparse(file_path, events=("end",)) | |
| for event, elem in context: | |
| if elem.tag == 'node' and event == 'end': | |
| node_id = elem.get("id") | |
| # For each bus_stop with bus routes | |
| if node_id in nodes_refs: | |
| refs = nodes_refs[node_id] | |
| # adds tag refs:all refs separated with ; | |
| #key_val = { | |
| #'k': 'refs', | |
| #'v': ";".join(refs), | |
| #} | |
| #new_tag = ET.SubElement(elem, 'tag', key_val) | |
| # adds tag route_ref=yes for each ref | |
| for ref in refs: | |
| key_val = { | |
| 'k': 'route_' + ref.strip(), | |
| 'v': 'yes' | |
| } | |
| new_tag = ET.SubElement(elem, 'tag', key_val) | |
| # writes tag and all children | |
| if elem.tag in set(['node', 'way', 'relation', 'bounds']): | |
| file_out.write( | |
| ET.tostring(elem, encoding='utf-8')) | |
| #clear reference to the elem | |
| elem.clear() | |
| del context | |
| file_out.write("</osm>") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment