Skip to content

Instantly share code, notes, and snippets.

@Phyks
Last active November 11, 2018 14:33
Show Gist options
  • Save Phyks/049fde37702e993159b64b40fe9c1cce to your computer and use it in GitHub Desktop.
Save Phyks/049fde37702e993159b64b40fe9c1cce to your computer and use it in GitHub Desktop.
Try to find postal codes without boundaries in OSM.
["01170", "01290", "01410", "01750", "06000", "06100", "06130", "06150", "06160", "06200", "06300", "06400", "06520", "06600",
"07160", "07190", "07310", "07320", "07510", "07630", "12140", "12350", "12460", "13090", "13100", "20000", "20090", "20200",
"20220", "20251", "20256", "20270", "20600", "26340", "26470", "30000", "30900", "33950", "33970", "34000", "34070", "34080",
"34090", "35000", "35200", "35700", "37000", "37100", "37200", "38000", "39120", "39330", "39600", "39800", "42000", "42100",
"44000", "44100", "44200", "44300", "45000", "45100", "46630", "49000", "49100", "50100", "50130", "57000", "57050", "57070",
"59000", "59260", "59491", "59493", "59650", "59777", "59800", "60113", "60190", "63000", "63100", "65200", "65710", "66000",
"66100", "67000", "67100", "67130", "67200", "67570", "68126", "68630", "75000", "76450", "76540", "76600", "76610", "76620",
"83000", "83100", "83200", "83370", "83530", "83600", "83700", "84000", "84140", "87000", "87100", "87280", "92190", "92360"]
#!/usr/bin/env python
import json
import logging
import os
from lxml import etree
logging_level = logging.WARN
if 'DEBUG' in os.environ:
logging_level = logging.DEBUG
logging.basicConfig(level=logging_level)
# communes.xml file is generated with:
# osmosis \
# --read-pbf france-latest.osm.pbf \
# --tf accept-relations boundary=administrative \
# --tf accept-relations admin_level=8
# --tf reject-ways \
# --tf reject-nodes \
# --write-xml communes.xml
with open('communes.xml', 'r') as fh:
communes_tree = etree.parse(fh)
# Extract all multiple postal codes fields from the "communes.xml" file
# For these cases, the city boundary does not match with the postal code.
multiple_postcodes_from_communes = []
for relation in communes_tree.findall('relation'):
relation_id = relation.attrib['id']
postcode = None
for tag in relation.findall('tag'):
if tag.attrib['k'] in ['addr:postcode', 'postal_code']:
postcode = [x.strip() for x in tag.attrib['v'].split(';')]
break
if not postcode:
continue
if len(postcode) > 1:
multiple_postcodes_from_communes.extend(postcode)
logging.info(
'[communes] Multiple postcodes found for relation %s: %s.',
relation_id, ','.join(postcode)
)
# "postal_codes.xml" file is generated with:
# osmosis \
# --read-pbf france-latest.osm.pbf \
# --tf accept-relations boundary=postal_code \
# --tf reject-ways \
# --tf reject-nodes \
# --write-xml postal_codes.xml
with open('postal_codes.xml', 'r') as fh:
postal_codes_tree = etree.parse(fh)
# Extract all the specific boundaries for postal codes (from
# boundary=postal_code relations).
postal_codes_with_boundary = []
for relation in postal_codes_tree.findall('relation'):
relation_id = relation.attrib['id']
postcode = None
for tag in relation.findall('tag'):
if tag.attrib['k'] == 'postal_code':
postcode = tag.attrib['v']
break
postal_codes_with_boundary.append(postcode)
logging.info(
'[postal_codes] Found boundary for postal code %s.',
postcode
)
# "arrondissements.xml" file is generated with:
# osmosis \
# --read-pbf france-latest.osm.pbf \
# --tf accept-relations boundary=administrative \
# --tf accept-relations admin_level=9
# --tf reject-ways \
# --tf reject-nodes \
# --write-xml arrondissements.xml
with open('arrondissements.xml', 'r') as fh:
arrondissements_tree = etree.parse(fh)
multiple_postcodes_from_arrondissements = []
# Extract all the arrondissements with a unique postal code.
for relation in arrondissements_tree.findall('relation'):
relation_id = relation.attrib['id']
postcode = None
for tag in relation.findall('tag'):
if tag.attrib['k'] in ['addr:postcode', 'postal_code']:
postcode = [x.strip() for x in tag.attrib['v'].split(';')]
break
if not postcode:
continue
if len(postcode) > 1:
multiple_postcodes_from_arrondissements.extend(postcode)
else:
postal_codes_with_boundary.extend(postcode)
logging.info(
'[postal_codes] Found arrondissement for postal code %s.',
postcode
)
# We can now list the postal codes without any matching boundary.
# NOTE: Considering further levels of admin_level such as admin_level=10 does
# not reduce further this list.
postal_codes_without_boundaries = sorted(list(
set(
multiple_postcodes_from_communes +
multiple_postcodes_from_arrondissements
) - set(postal_codes_with_boundary)
))
# Filter out postal codes with less than 5 characters which are not French
# ones
print(json.dumps([x for x in postal_codes_without_boundaries if len(x) == 5]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment