trolleway · December 8, 2016 14:21
diff --git a/kml_extended2csv.py b/kml_extended2csv.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 # parse each file from the photo collection and export data into CSV.
 # will need: os.listdir(path)
 #http://robertcarlsen.net/2010/02/23/parsing-foursquare-kml-files-1113

 import sys
 import os
 import codecs
 import csv
 from bs4 import  BeautifulSoup
 
 # get the file list:
 if len(sys.argv) > 1:
    dir = sys.argv[1]
 else:
    dir = os.getcwd()
     
 file = dir
 
 # create the output dictionary
 outputData = []
 
 # sanity checking, only work on kml files
 if file.endswith('.kml') == 0: sys.exit(-1)
 
 print "Reading file: "+file
 
 fh = codecs.open(file,'r',"utf-8")
 html = fh.read()
 fh.close()
 
 soup = BeautifulSoup(html, 'lxml')
 #print soup.prettify()
 
 # create a new dictionary for the current image's data
 imageData = dict();
 outputData = {}

 
 # get the image data:



 for placemark in soup.findAll('placemark'):
    attributes = {}

    Name = placemark.find_all('name')
    attributes['Name']  = Name[0].string
    print attributes['Name']

    '''
    Эта штука должна вытаскивать данные из <style><LabelStyle>, но почему-то не вытаскивает
    Потом такая же конструкция должна вытаскивать данные из <ExtendedData><Data name="CELLID"><value>4881</value>
    '''
    style = soup.find('style')
    print style.findAll('LabelStyle')
    lss = style.findAll('LabelStyle')
    ls = lss[0]
    print ls





   
    print '--------------------'


    row = i.contents

    '''
    Тут собирается массив outputData с атрибутами, потом он выгружается в csv
    '''
 
    # add the current data to the dict
    imageData = {}
    imageData['Name'] = row[0].contents[0].string.encode("ascii","ignore")
    imageData['Description'] = row[1].contents[0].string.encode("ascii","ignore")
    imageData['Time'] = row[3].contents[0].string.encode("ascii","ignore")
    coord = row[5].coordinates.contents[0].string.encode("ascii","ignore")
    imageData['Lon'] = coord.split(',')[0]
    imageData['Lat'] = coord.split(',')[1]
 
    # add this image's data to the list
    outputData.append(imageData)
 
 #print outputData
 
 # create the output file
 out = codecs.open(os.getcwd() + "/out.csv", 'w',"utf-8")
 firstRun = 1
 
 print "Writing output file: "+ out.name
 try:
    fieldnames = sorted(outputData[0].keys())
    fieldnames.reverse()
    writer = csv.DictWriter(out,dialect='excel', fieldnames=fieldnames, extrasaction='ignore', quoting=csv.QUOTE_NONNUMERIC)
    headers = dict( (n,n) for n in fieldnames )
    writer.writerow(headers)
 
    for row in outputData:
         writer.writerow(row)
 
 finally:
    out.close()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	# parse each file from the photo collection and export data into CSV.
	# will need: os.listdir(path)
	#http://robertcarlsen.net/2010/02/23/parsing-foursquare-kml-files-1113

	import sys
	import os
	import codecs
	import csv
	from bs4 import BeautifulSoup

	# get the file list:
	if len(sys.argv) > 1:
	dir = sys.argv[1]
	else:
	dir = os.getcwd()

	file = dir

	# create the output dictionary
	outputData = []

	# sanity checking, only work on kml files
	if file.endswith('.kml') == 0: sys.exit(-1)

	print "Reading file: "+file

	fh = codecs.open(file,'r',"utf-8")
	html = fh.read()
	fh.close()

	soup = BeautifulSoup(html, 'lxml')
	#print soup.prettify()

	# create a new dictionary for the current image's data
	imageData = dict();
	outputData = {}


	# get the image data:



	for placemark in soup.findAll('placemark'):
	attributes = {}

	Name = placemark.find_all('name')
	attributes['Name'] = Name[0].string
	print attributes['Name']

	'''
	Эта штука должна вытаскивать данные из <style><LabelStyle>, но почему-то не вытаскивает
	Потом такая же конструкция должна вытаскивать данные из <ExtendedData><Data name="CELLID"><value>4881</value>
	'''
	style = soup.find('style')
	print style.findAll('LabelStyle')
	lss = style.findAll('LabelStyle')
	ls = lss[0]
	print ls






	print '--------------------'


	row = i.contents

	'''
	Тут собирается массив outputData с атрибутами, потом он выгружается в csv
	'''

	# add the current data to the dict
	imageData = {}
	imageData['Name'] = row[0].contents[0].string.encode("ascii","ignore")
	imageData['Description'] = row[1].contents[0].string.encode("ascii","ignore")
	imageData['Time'] = row[3].contents[0].string.encode("ascii","ignore")
	coord = row[5].coordinates.contents[0].string.encode("ascii","ignore")
	imageData['Lon'] = coord.split(',')[0]
	imageData['Lat'] = coord.split(',')[1]

	# add this image's data to the list
	outputData.append(imageData)

	#print outputData

	# create the output file
	out = codecs.open(os.getcwd() + "/out.csv", 'w',"utf-8")
	firstRun = 1

	print "Writing output file: "+ out.name
	try:
	fieldnames = sorted(outputData[0].keys())
	fieldnames.reverse()
	writer = csv.DictWriter(out,dialect='excel', fieldnames=fieldnames, extrasaction='ignore', quoting=csv.QUOTE_NONNUMERIC)
	headers = dict( (n,n) for n in fieldnames )
	writer.writerow(headers)

	for row in outputData:
	writer.writerow(row)

	finally:
	out.close()