Last active
December 29, 2015 10:29
-
-
Save TimSC/7657062 to your computer and use it in GitHub Desktop.
Extract a small area from an OSM file by Tim Sheerman-Chase, 2013.
You may reuse this file under the terms of CC0 https://creativecommons.org/publicdomain/zero/1.0/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Extract a small area from an OSM file | |
#by Tim Sheerman-Chase, 2013 | |
#You may reuse this file under the terms of CC0 | |
#https://creativecommons.org/publicdomain/zero/1.0/ | |
import xml.parsers.expat as expat | |
import pickle, bz2 | |
from xml.sax.saxutils import escape, quoteattr | |
class ExpatParse(object): | |
def __init__(self): | |
self.parser = expat.ParserCreate() | |
self.parser.CharacterDataHandler = self.HandleCharData | |
self.parser.StartElementHandler = self.HandleStartElement | |
self.parser.EndElementHandler = self.HandleEndElement | |
self.depth = 0 | |
self.tags = [] | |
self.attr = [] | |
def ParseFile(self, fi): | |
fi.seek(0) | |
self.parser.ParseFile(fi) | |
def HandleCharData(self, data): | |
pass | |
def HandleStartElement(self, name, attrs): | |
self.depth += 1 | |
self.tags.append(name) | |
self.attr.append(attrs) | |
def HandleEndElement(self, name): | |
self.depth -= 1 | |
self.tags.pop() | |
self.attr.pop() | |
class RoiNodes(ExpatParse): | |
def __init__(self): | |
ExpatParse.__init__(self) | |
self.roi = [51.072,51.472], [-0.850,0.040] | |
self.objTypeCount = {'node':0, 'way':0, 'relation':0, 'changeset':0} | |
self.objCount = 0 | |
self.foundNodes = set() | |
def HandleStartElement(self, name, attrs): | |
ExpatParse.HandleStartElement(self, name, attrs) | |
assert self.depth < 4 | |
if self.depth == 2: | |
if name in self.objTypeCount: | |
self.objTypeCount[name] += 1 | |
self.objCount += 1 | |
if self.objCount % 100000==0: | |
print self.objTypeCount, len(self.foundNodes) | |
if self.depth == 2 and name == "node": | |
lat = float(attrs['lat']) | |
lon = float(attrs['lon']) | |
if lat >= self.roi[0][0] \ | |
and lat <= self.roi[0][1] \ | |
and lon >= self.roi[1][0] \ | |
and lon <= self.roi[1][1]: | |
self.foundNodes.add(int(attrs['id'])) | |
def HandleEndElement(self, name): | |
ExpatParse.HandleEndElement(self, name) | |
class RoiWays(ExpatParse): | |
def __init__(self): | |
ExpatParse.__init__(self) | |
self.roiNodes = set() | |
self.objTypeCount = {'node':0, 'way':0, 'relation':0, 'changeset':0} | |
self.objCount = 0 | |
self.foundWays = set() | |
self.wayNids = set() | |
self.wayHit = 0 | |
self.referencesNodes = set() | |
self.objTypeTotal = None | |
def HandleStartElement(self, name, attrs): | |
ExpatParse.HandleStartElement(self, name, attrs) | |
assert self.depth < 4 | |
if self.depth == 2: | |
if name in self.objTypeCount: | |
self.objTypeCount[name] += 1 | |
self.objCount += 1 | |
if self.objCount % 100000==0: | |
if self.objTypeTotal is not None: | |
objDone = sum(self.objTypeCount.values()) | |
objTotal = sum(self.objTypeTotal.values()) | |
progress = float(objDone) / objTotal | |
print progress, | |
print "w", self.objTypeCount, len(self.foundWays), len(self.referencesNodes) | |
self.wayNids = set() | |
self.wayHit = 0 | |
if self.depth == 3 and self.tags[1] == "way" and self.tags[2] == "nd": | |
nid = int(attrs['ref']) | |
self.wayNids.add(nid) | |
if nid in self.roiNodes: | |
self.wayHit = 1 | |
def HandleEndElement(self, name): | |
if self.depth == 2: | |
if self.wayHit: | |
self.foundWays.add(int(self.attr[1]['id'])) | |
self.referencesNodes.update(self.wayNids) | |
ExpatParse.HandleEndElement(self, name) | |
class RoiRelations(ExpatParse): | |
def __init__(self): | |
ExpatParse.__init__(self) | |
self.roiNodes = set() | |
self.roiWays = set() | |
self.objTypeCount = {'node':0, 'way':0, 'relation':0, 'changeset':0} | |
self.objCount = 0 | |
self.foundRelations = set() | |
self.objTypeTotal = None | |
def HandleStartElement(self, name, attrs): | |
ExpatParse.HandleStartElement(self, name, attrs) | |
assert self.depth < 4 | |
if self.depth == 2: | |
if name in self.objTypeCount: | |
self.objTypeCount[name] += 1 | |
self.objCount += 1 | |
if self.objCount % 100000==0: | |
if self.objTypeTotal is not None: | |
objDone = sum(self.objTypeCount.values()) | |
objTotal = sum(self.objTypeTotal.values()) | |
progress = float(objDone) / objTotal | |
print progress, | |
print "r", self.objTypeCount, len(self.foundRelations) | |
if self.depth == 3 and self.tags[1] == "relation" and self.tags[2] == "member": | |
nid = int(attrs['ref']) | |
ty = attrs['type'] | |
if ty == "node" and nid in self.roiNodes: | |
self.foundRelations.add(int(self.attr[1]['id'])) | |
if ty == "way" and nid in self.roiWays: | |
self.foundRelations.add(int(self.attr[1]['id'])) | |
def HandleEndElement(self, name): | |
ExpatParse.HandleEndElement(self, name) | |
class WriteOutput(ExpatParse): | |
def __init__(self, fina): | |
ExpatParse.__init__(self) | |
self.roiNodes = set() | |
self.roiWays = set() | |
self.roiRelations = set() | |
self.objTypeCount = {'node':0, 'way':0, 'relation':0, 'changeset':0} | |
self.objCount = 0 | |
self.skip = 0 | |
self.roi = None | |
self.objTypeTotal = None | |
self.missingMem = 0 | |
if isinstance(fina, basestring): | |
self.fi = open(fina, "wt") | |
else: | |
self.fi = fina | |
self.fi.write("<?xml version='1.0' encoding='UTF-8'?>\n") | |
def HandleCharData(self, data): | |
pass | |
def HandleStartElement(self, name, attrs): | |
ExpatParse.HandleStartElement(self, name, attrs) | |
assert self.depth < 4 | |
if self.depth == 2: | |
if name in self.objTypeCount: | |
self.objTypeCount[name] += 1 | |
self.objCount += 1 | |
if self.objCount % 100000==0: | |
if self.objTypeTotal is not None: | |
objDone = sum(self.objTypeCount.values()) | |
objTotal = sum(self.objTypeTotal.values()) | |
progress = float(objDone) / objTotal | |
print progress, | |
print "o", self.objTypeCount | |
self.skip = 1 | |
if "id" in attrs: | |
objId = int(attrs['id']) | |
if name=="node" and objId in self.roiNodes: | |
self.skip = 0 | |
if name=="way" and objId in self.roiWays: | |
self.skip = 0 | |
if name=="relation" and objId in self.roiRelations: | |
self.skip = 0 | |
#Prevent incomplete relations with negative ids | |
if not self.skip and self.depth == 3 and self.tags[1]=="relation" and name == "member": | |
self.missingMem = 0 | |
objId = int(self.attr[1]['id']) | |
nid = int(attrs['ref']) | |
ty = attrs['type'] | |
if objId < 0 and ty == "node" and nid not in self.roiNodes: self.missingMem = 1 | |
if objId < 0 and ty == "way" and nid not in self.roiWays: self.missingMem = 1 | |
if objId < 0 and ty == "relation" and nid not in self.roiRelations: self.missingMem = 1 | |
if not self.skip and not self.missingMem: | |
openTag = "<"+name | |
for k in attrs: | |
openTag += " "+k+"="+quoteattr(escape(str(attrs[k]))) | |
openTag += ">\n" | |
self.fi.write(openTag.encode("utf-8")) | |
if self.depth == 1 and self.roi is not None: | |
self.fi.write("<bounds minlat='{0}' minlon='{1}' maxlat='{2}' maxlon='{3}'/>\n" \ | |
.format(self.roi[0][0], self.roi[1][0], self.roi[0][1], self.roi[1][1])) | |
def HandleEndElement(self, name): | |
if not self.skip and not self.missingMem: | |
closeTag = "</"+name+">\n" | |
self.fi.write(closeTag.encode("utf-8")) | |
if self.depth <= 2: | |
self.skip = 0 | |
if self.depth <= 3: | |
self.missingMem = 0 | |
ExpatParse.HandleEndElement(self, name) | |
if __name__=="__main__": | |
#fi = open('out.osm',"rt") | |
fi = bz2.BZ2File("out.osm.bz2","r") | |
roi = [51.072,51.472], [-0.850,0.040] | |
#Find nodes in area of interest | |
print "Finding nodes" | |
if 1: | |
roiNodes = RoiNodes() | |
roiNodes.roi = roi | |
roiNodes.ParseFile(fi) | |
foundNodes = roiNodes.foundNodes | |
objTypeCount = roiNodes.objTypeCount | |
pickle.dump(foundNodes, open("foundNodes.bin","wb"), protocol=-1) | |
pickle.dump(objTypeCount, open("objTypeCount.bin","wb"), protocol=-1) | |
del roiNodes | |
else: | |
foundNodes = pickle.load(open("foundNodes.bin","rb")) | |
objTypeCount = pickle.load(open("objTypeCount.bin","rb")) | |
#Find ways that reference ROI nodes | |
print "Finding ways" | |
if 1: | |
roiWays = RoiWays() | |
roiWays.roiNodes = foundNodes | |
roiWays.objTypeTotal = objTypeCount | |
roiWays.ParseFile(fi) | |
foundWays = roiWays.foundWays | |
referencesNodes = roiWays.referencesNodes | |
pickle.dump(foundWays, open("foundWays.bin","wb"), protocol=-1) | |
pickle.dump(referencesNodes, open("referencesNodes.bin","wb"), protocol=-1) | |
del roiWays | |
del foundNodes | |
else: | |
foundWays = pickle.load(open("foundWays.bin","rb")) | |
referencesNodes = pickle.load(open("referencesNodes.bin","rb")) | |
#Get relations that reference ROI objects | |
print "Find relations" | |
if 1: | |
roiRelations = RoiRelations() | |
roiRelations.roiNodes = referencesNodes | |
roiRelations.roiWays = foundWays | |
roiRelations.objTypeTotal = objTypeCount | |
roiRelations.ParseFile(fi) | |
foundRelations = roiRelations.foundRelations | |
pickle.dump(foundRelations, open("foundRelations.bin","wb"), protocol=-1) | |
del roiRelations | |
else: | |
foundRelations = pickle.load(open("foundRelations.bin","rb")) | |
#Write output | |
print "Write output" | |
outfi = bz2.BZ2File("trim.osm.bz2", "w") | |
writeOutput = WriteOutput(outfi) | |
writeOutput.roiNodes = referencesNodes | |
writeOutput.roiWays = foundWays | |
writeOutput.roiRelations = foundRelations | |
writeOutput.objTypeTotal = objTypeCount | |
writeOutput.roi = roi | |
writeOutput.ParseFile(fi) | |
outfi.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment