Skip to content

Instantly share code, notes, and snippets.

@ap-Codkelden
Last active May 1, 2020 10:00
Show Gist options
  • Save ap-Codkelden/942dbf063f47ac60a2b51cd5bd91f668 to your computer and use it in GitHub Desktop.
Save ap-Codkelden/942dbf063f47ac60a2b51cd5bd91f668 to your computer and use it in GitHub Desktop.
fix street names in OSM Overpass XML file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020 Nasridinov Renat
# MIT License
import argparse
import logging
import re
import sys
from lxml import etree
from pathlib import Path
logging.basicConfig(
format='%(levelname)s:%(message)s', filename='street.log',
filemode='w',level=logging.INFO)
STREETS = {
"^вул\.?(?:иця)?(.+?)$": "вулиця",
"^ул\.?(?:ица)?(.+?)$": "улица",
"^пров(?!еряем)\.?(?:улок)?(.+?)$": "провулок",
"^провулок\s(.+?)$": "провулок",
# "^пров\.?(?:улок)?(.+?)$": "провулок",
"^переулок\s(.+?)$": "переулок",
"^пер\.?(?:еул\.(?:ок)?)?(.+?)$": "переулок",
"^проезд\s(.+?)$": "проезд",
"^проулок\s(.+?)$": "проулок",
"^проїзд\s(.+?)$": "проїзд",
"^пр(?:\.\s)?(?:(?:осп(?:\.\s)?(?:ект)?)?)?\s(.+?)$": "проспект",
}
def reverse_street(value):
_ = re.search("\(|\)", value)
if _ is not None:
logging.info(value)
_ = re.search("\d+-го", value)
if _ is not None:
logging.info(value)
match = [y for y in [(x, re.match(x, value, re.I)) for x in
STREETS.keys()] if y[1] is not None]
if not match:
logging.error(value)
return
k = match[0][1].group(1).strip(), STREETS[match[0][0]]
return(' '.join(k))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('xmlfile', help='file to be processed', type=str)
parser.add_argument('outfile', help='file to be write', type=str,
default='osm_new.xml', nargs='?')
args = parser.parse_args()
osm_filepath = Path(args.xmlfile)
if not osm_filepath.is_file():
print("No file found.\n")
sys.exit(1)
try:
osm_tree = etree.parse(args.xmlfile)
except:
print("Input file processing error, maybe it is not "
"valid XML file?\n")
sys.exit(1)
tags = osm_tree.xpath('//tag')
counter = 0
for tag in tags:
k, v = tag.attrib.values()
if not k or k == "name:en":
continue
if re.match("^(?:old_)?name(?:\:(?:ru|uk))?$", k):
if re.match("^.+? (?:вулиця|пров?улок|площа|проспект|улица|"\
"переулок|площадь|про[еї]зд)$", v):
continue
new_name = reverse_street(v)
if new_name:
tag.attrib['v'] = new_name
parent = tag.xpath("./parent::*")
parent[0].attrib['action']='modify'
counter += 1
if counter == 0:
print("No elements processed")
sys.exit(0)
else:
print(f"{counter} elements processed")
try:
with open(args.outfile, 'wb') as f:
f.write(etree.tostring(osm_tree,
pretty_print=False,xml_declaration=True,encoding="UTF-8"))
except Exception as e:
print("There is an error occurred during output file "
f"write process:\n{e.strerror}\n")
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment