Last active
May 1, 2020 10:00
-
-
Save ap-Codkelden/942dbf063f47ac60a2b51cd5bd91f668 to your computer and use it in GitHub Desktop.
fix street names in OSM Overpass XML file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright (c) 2020 Nasridinov Renat | |
# MIT License | |
import argparse | |
import logging | |
import re | |
import sys | |
from lxml import etree | |
from pathlib import Path | |
logging.basicConfig( | |
format='%(levelname)s:%(message)s', filename='street.log', | |
filemode='w',level=logging.INFO) | |
STREETS = { | |
"^вул\.?(?:иця)?(.+?)$": "вулиця", | |
"^ул\.?(?:ица)?(.+?)$": "улица", | |
"^пров(?!еряем)\.?(?:улок)?(.+?)$": "провулок", | |
"^провулок\s(.+?)$": "провулок", | |
# "^пров\.?(?:улок)?(.+?)$": "провулок", | |
"^переулок\s(.+?)$": "переулок", | |
"^пер\.?(?:еул\.(?:ок)?)?(.+?)$": "переулок", | |
"^проезд\s(.+?)$": "проезд", | |
"^проулок\s(.+?)$": "проулок", | |
"^проїзд\s(.+?)$": "проїзд", | |
"^пр(?:\.\s)?(?:(?:осп(?:\.\s)?(?:ект)?)?)?\s(.+?)$": "проспект", | |
} | |
def reverse_street(value): | |
_ = re.search("\(|\)", value) | |
if _ is not None: | |
logging.info(value) | |
_ = re.search("\d+-го", value) | |
if _ is not None: | |
logging.info(value) | |
match = [y for y in [(x, re.match(x, value, re.I)) for x in | |
STREETS.keys()] if y[1] is not None] | |
if not match: | |
logging.error(value) | |
return | |
k = match[0][1].group(1).strip(), STREETS[match[0][0]] | |
return(' '.join(k)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('xmlfile', help='file to be processed', type=str) | |
parser.add_argument('outfile', help='file to be write', type=str, | |
default='osm_new.xml', nargs='?') | |
args = parser.parse_args() | |
osm_filepath = Path(args.xmlfile) | |
if not osm_filepath.is_file(): | |
print("No file found.\n") | |
sys.exit(1) | |
try: | |
osm_tree = etree.parse(args.xmlfile) | |
except: | |
print("Input file processing error, maybe it is not " | |
"valid XML file?\n") | |
sys.exit(1) | |
tags = osm_tree.xpath('//tag') | |
counter = 0 | |
for tag in tags: | |
k, v = tag.attrib.values() | |
if not k or k == "name:en": | |
continue | |
if re.match("^(?:old_)?name(?:\:(?:ru|uk))?$", k): | |
if re.match("^.+? (?:вулиця|пров?улок|площа|проспект|улица|"\ | |
"переулок|площадь|про[еї]зд)$", v): | |
continue | |
new_name = reverse_street(v) | |
if new_name: | |
tag.attrib['v'] = new_name | |
parent = tag.xpath("./parent::*") | |
parent[0].attrib['action']='modify' | |
counter += 1 | |
if counter == 0: | |
print("No elements processed") | |
sys.exit(0) | |
else: | |
print(f"{counter} elements processed") | |
try: | |
with open(args.outfile, 'wb') as f: | |
f.write(etree.tostring(osm_tree, | |
pretty_print=False,xml_declaration=True,encoding="UTF-8")) | |
except Exception as e: | |
print("There is an error occurred during output file " | |
f"write process:\n{e.strerror}\n") | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment