Skip to content

Instantly share code, notes, and snippets.

@Drunkar
Last active May 23, 2019 12:28
Show Gist options
  • Save Drunkar/8102055 to your computer and use it in GitHub Desktop.
Save Drunkar/8102055 to your computer and use it in GitHub Desktop.
import sys
import codecs
from sets import Set
import xml.etree.ElementTree as ET
convert_tuples = [
(u'\u00a6',u'\u007c'),#broken bar=>vertical bar
(u'\u2014',u'\u2015'),#horizontal bar=>em dash
(u'\u2225',u'\u2016'),#parallel to=>double vertical line
(u'\uff0d',u'\u2212'),#minus sign=>fullwidth hyphen minus
(u'\uff5e',u'\u301c'),#fullwidth tilde=>wave dash
(u'\uffe0',u'\u00a2'),#fullwidth cent sign=>cent sign
(u'\uffe1',u'\u00a3'),#fullwidth pound sign=>pound sign
(u'\uffe2',u'\u00ac'),#fullwidth not sign=>not sign
]
def unsafe2safe(string):
for unsafe, safe in convert_tuples:
string = string.replace(unsafe, safe)
return string
def initializeDictionary(dictionary, val):
for k in dictionary.keys():
dictionary[k] = val
def getCSVOfAttributesInDictionary(keys, dictionary, ignore_rist=[]):
"""Join values of [dictionary] with key list [keys].
If there no value of a key, 'None' will be output."""
output = ""
for k in keys:
if k in ignore_rist: continue
attr = dictionary.get(k)
if attr == None:
output += "None,"
else:
output += unsafe2safe(attr) + ","
return output
def getAttributes(root):
"""Search all items and return label list."""
labels = Set([])
print "Get attributes..."
for node in root.getiterator("node"):
attrs = Set([tag.get("k") for tag in node.getiterator("tag")])
labels.update(attrs)
return labels
def askAttributes(labels):
"""Ask labels to output.
If answer is 'all', all attributes will be output."""
submitted = False
while not submitted:
print "\navailable attributes:"
print "-------------------------------------------------------------------------------"
for label in labels:
print label + "\t",
print "\n-------------------------------------------------------------------------------"
order = raw_input("desirable attributes?\n (ex1: width,type ex2: all ex3: exit): ")
if order == "exit":
print "exit."
sys.exit()
elif order == "all":
desirable_attributes = labels
else:
desirable_attributes = order.split(",")
while True:
print "-------------------------------------------------------------------------------"
if len(desirable_attributes):
for item in desirable_attributes:
print "\"" + item + "\"\t",
else:
print ""
print "\n-------------------------------------------------------------------------------"
confirm = raw_input("output these attributes? (y/n/exit) : ")
if confirm == "exit":
print "exit."
sys.exit()
elif confirm == "y":
submitted = True
break
elif confirm == "n":
break
return desirable_attributes
def writeNodes(root, out_file_name):
"""Write CSV of id, lat, lng and other attributes."""
# set label
label = "id,lat,lng,"
labels = getAttributes(root)
desirable_attributes = askAttributes(labels)
print "Writing..."
if not len(desirable_attributes) == 0:
label += ",".join(desirable_attributes)
of = codecs.open(out_file_name, "w", "utf-8")
of.write(label+"\n")
# get and write values
attributes = {}
for node in root.getiterator("node"):
initializeDictionary(attributes, "None")
node_id = node.get("id")
lat = node.get("lat")
lng = node.get("lon")
# get attributes
for tag in node.getiterator("tag"):
attributes[tag.get("k")] = tag.get("v")
# write to csv
values = getCSVOfAttributesInDictionary(desirable_attributes, attributes)
row = node_id + "," + lat + "," + lng + "," + values
row = row.rstrip(",")
of.write(row + "\n")
of.close()
print "Done."
def main():
file_name = ""
while len(file_name) == 0:
file_name = raw_input("file_name?: ")
out_file_name = file_name + "_nodes.csv"
print "Parsing file..."
tree = ET.parse(file_name)
root = tree.getroot()
writeNodes(root, out_file_name)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment