Last active
November 11, 2018 14:33
-
-
Save Phyks/049fde37702e993159b64b40fe9c1cce to your computer and use it in GitHub Desktop.
Try to find postal codes without boundaries in OSM.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
["01170", "01290", "01410", "01750", "06000", "06100", "06130", "06150", "06160", "06200", "06300", "06400", "06520", "06600", | |
"07160", "07190", "07310", "07320", "07510", "07630", "12140", "12350", "12460", "13090", "13100", "20000", "20090", "20200", | |
"20220", "20251", "20256", "20270", "20600", "26340", "26470", "30000", "30900", "33950", "33970", "34000", "34070", "34080", | |
"34090", "35000", "35200", "35700", "37000", "37100", "37200", "38000", "39120", "39330", "39600", "39800", "42000", "42100", | |
"44000", "44100", "44200", "44300", "45000", "45100", "46630", "49000", "49100", "50100", "50130", "57000", "57050", "57070", | |
"59000", "59260", "59491", "59493", "59650", "59777", "59800", "60113", "60190", "63000", "63100", "65200", "65710", "66000", | |
"66100", "67000", "67100", "67130", "67200", "67570", "68126", "68630", "75000", "76450", "76540", "76600", "76610", "76620", | |
"83000", "83100", "83200", "83370", "83530", "83600", "83700", "84000", "84140", "87000", "87100", "87280", "92190", "92360"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import logging | |
import os | |
from lxml import etree | |
logging_level = logging.WARN | |
if 'DEBUG' in os.environ: | |
logging_level = logging.DEBUG | |
logging.basicConfig(level=logging_level) | |
# communes.xml file is generated with: | |
# osmosis \ | |
# --read-pbf france-latest.osm.pbf \ | |
# --tf accept-relations boundary=administrative \ | |
# --tf accept-relations admin_level=8 | |
# --tf reject-ways \ | |
# --tf reject-nodes \ | |
# --write-xml communes.xml | |
with open('communes.xml', 'r') as fh: | |
communes_tree = etree.parse(fh) | |
# Extract all multiple postal codes fields from the "communes.xml" file | |
# For these cases, the city boundary does not match with the postal code. | |
multiple_postcodes_from_communes = [] | |
for relation in communes_tree.findall('relation'): | |
relation_id = relation.attrib['id'] | |
postcode = None | |
for tag in relation.findall('tag'): | |
if tag.attrib['k'] in ['addr:postcode', 'postal_code']: | |
postcode = [x.strip() for x in tag.attrib['v'].split(';')] | |
break | |
if not postcode: | |
continue | |
if len(postcode) > 1: | |
multiple_postcodes_from_communes.extend(postcode) | |
logging.info( | |
'[communes] Multiple postcodes found for relation %s: %s.', | |
relation_id, ','.join(postcode) | |
) | |
# "postal_codes.xml" file is generated with: | |
# osmosis \ | |
# --read-pbf france-latest.osm.pbf \ | |
# --tf accept-relations boundary=postal_code \ | |
# --tf reject-ways \ | |
# --tf reject-nodes \ | |
# --write-xml postal_codes.xml | |
with open('postal_codes.xml', 'r') as fh: | |
postal_codes_tree = etree.parse(fh) | |
# Extract all the specific boundaries for postal codes (from | |
# boundary=postal_code relations). | |
postal_codes_with_boundary = [] | |
for relation in postal_codes_tree.findall('relation'): | |
relation_id = relation.attrib['id'] | |
postcode = None | |
for tag in relation.findall('tag'): | |
if tag.attrib['k'] == 'postal_code': | |
postcode = tag.attrib['v'] | |
break | |
postal_codes_with_boundary.append(postcode) | |
logging.info( | |
'[postal_codes] Found boundary for postal code %s.', | |
postcode | |
) | |
# "arrondissements.xml" file is generated with: | |
# osmosis \ | |
# --read-pbf france-latest.osm.pbf \ | |
# --tf accept-relations boundary=administrative \ | |
# --tf accept-relations admin_level=9 | |
# --tf reject-ways \ | |
# --tf reject-nodes \ | |
# --write-xml arrondissements.xml | |
with open('arrondissements.xml', 'r') as fh: | |
arrondissements_tree = etree.parse(fh) | |
multiple_postcodes_from_arrondissements = [] | |
# Extract all the arrondissements with a unique postal code. | |
for relation in arrondissements_tree.findall('relation'): | |
relation_id = relation.attrib['id'] | |
postcode = None | |
for tag in relation.findall('tag'): | |
if tag.attrib['k'] in ['addr:postcode', 'postal_code']: | |
postcode = [x.strip() for x in tag.attrib['v'].split(';')] | |
break | |
if not postcode: | |
continue | |
if len(postcode) > 1: | |
multiple_postcodes_from_arrondissements.extend(postcode) | |
else: | |
postal_codes_with_boundary.extend(postcode) | |
logging.info( | |
'[postal_codes] Found arrondissement for postal code %s.', | |
postcode | |
) | |
# We can now list the postal codes without any matching boundary. | |
# NOTE: Considering further levels of admin_level such as admin_level=10 does | |
# not reduce further this list. | |
postal_codes_without_boundaries = sorted(list( | |
set( | |
multiple_postcodes_from_communes + | |
multiple_postcodes_from_arrondissements | |
) - set(postal_codes_with_boundary) | |
)) | |
# Filter out postal codes with less than 5 characters which are not French | |
# ones | |
print(json.dumps([x for x in postal_codes_without_boundaries if len(x) == 5])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment