Created
July 31, 2013 18:40
-
-
Save bmander/6124848 to your computer and use it in GitHub Desktop.
Digest a tarfile full of GPX files into a more manageable CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lzma | |
from xml.dom.minidom import parseString, parse | |
import dateutil.parser | |
import calendar | |
from pyproj import Geod | |
import tarfile | |
from xml.parsers import expat | |
def parse_trkpt( node ): | |
lat = float(node.getAttribute("lat")) | |
lon = float(node.getAttribute("lon")) | |
strtime = node.getElementsByTagName("time")[0].firstChild.data | |
dt = dateutil.parser.parse(strtime) | |
tt = calendar.timegm( dt.timetuple() ) | |
return (lat,lon,tt) | |
gg = Geod(ellps='clrk66') | |
fn = "gpx-planet-2013-04-09.tar.xz" | |
lzfp = lzma.LZMAFile( fn ) | |
tf = tarfile.open( "gpx-planet-2013-04-09.tar" ) | |
counts = {'i':0} | |
fpout = open( "points.csv", "w" ) | |
class GPXParser(object): | |
def __init__(self, counts): | |
self._ontime = False | |
self._curpt = None | |
self._curtimechars = None | |
parser = expat.ParserCreate() | |
def onStartElement(name, attributes): | |
if name=="trk": | |
counts['i'] += 1 | |
print counts | |
if name=="trkpt": | |
self._curpt = attributes | |
if name=="time": | |
self._ontime = True | |
self._curtimechars = "" | |
def onEndElement(name): | |
if name=="time": | |
self._ontime = False | |
elif name=="trkpt": | |
fpout.write( "%s,%s,%s,%s\n"%(counts['i'],self._curtimechars,self._curpt['lat'],self._curpt['lon']) ) | |
self._curpt = None | |
self._curtimechars = None | |
def onCharacterData(data): | |
if self._ontime: | |
self._curtimechars += data | |
parser.StartElementHandler = onStartElement | |
parser.EndElementHandler = onEndElement | |
parser.CharacterDataHandler = onCharacterData | |
self._parser = parser | |
def parseFile(self, fp): | |
self._parser.ParseFile( fp ) | |
members = tf.getmembers() | |
for i, info in enumerate( members ): | |
print "%s/%s"%(i, len(members)) | |
if not info.isfile(): | |
continue | |
if info.name.split("/")[-1]=="metadata.xml": | |
continue | |
fp = tf.extractfile( info ) | |
parser = GPXParser(counts) | |
parser.parseFile( fp ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment