Skip to content

Instantly share code, notes, and snippets.

@bmander
Created July 31, 2013 18:40
Show Gist options
  • Save bmander/6124848 to your computer and use it in GitHub Desktop.
Save bmander/6124848 to your computer and use it in GitHub Desktop.
Digest a tarfile full of GPX files into a more manageable CSV file
import lzma
from xml.dom.minidom import parseString, parse
import dateutil.parser
import calendar
from pyproj import Geod
import tarfile
from xml.parsers import expat
def parse_trkpt( node ):
lat = float(node.getAttribute("lat"))
lon = float(node.getAttribute("lon"))
strtime = node.getElementsByTagName("time")[0].firstChild.data
dt = dateutil.parser.parse(strtime)
tt = calendar.timegm( dt.timetuple() )
return (lat,lon,tt)
gg = Geod(ellps='clrk66')
fn = "gpx-planet-2013-04-09.tar.xz"
lzfp = lzma.LZMAFile( fn )
tf = tarfile.open( "gpx-planet-2013-04-09.tar" )
counts = {'i':0}
fpout = open( "points.csv", "w" )
class GPXParser(object):
def __init__(self, counts):
self._ontime = False
self._curpt = None
self._curtimechars = None
parser = expat.ParserCreate()
def onStartElement(name, attributes):
if name=="trk":
counts['i'] += 1
print counts
if name=="trkpt":
self._curpt = attributes
if name=="time":
self._ontime = True
self._curtimechars = ""
def onEndElement(name):
if name=="time":
self._ontime = False
elif name=="trkpt":
fpout.write( "%s,%s,%s,%s\n"%(counts['i'],self._curtimechars,self._curpt['lat'],self._curpt['lon']) )
self._curpt = None
self._curtimechars = None
def onCharacterData(data):
if self._ontime:
self._curtimechars += data
parser.StartElementHandler = onStartElement
parser.EndElementHandler = onEndElement
parser.CharacterDataHandler = onCharacterData
self._parser = parser
def parseFile(self, fp):
self._parser.ParseFile( fp )
members = tf.getmembers()
for i, info in enumerate( members ):
print "%s/%s"%(i, len(members))
if not info.isfile():
continue
if info.name.split("/")[-1]=="metadata.xml":
continue
fp = tf.extractfile( info )
parser = GPXParser(counts)
parser.parseFile( fp )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment