Created
December 13, 2011 23:37
-
-
Save gka/1474480 to your computer and use it in GitHub Desktop.
This script tries to map European zip codes to NUTS2 regions by point-in-polygon checking the zip coordinate against a shapefile of NUTS regions..
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
import csv, shapefile, Polygon, Polygon.IO, math | |
# shapefile comes from here: | |
# http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/NUTS_10M_2006_SH.zip | |
shpsrc = "eu-nuts-2.shp" | |
# zip code database comes from here: | |
# http://www.clearlyandsimply.com/files/2010/10/european_cities_and_postcodes_us_standard.zip | |
zipsrc = "european_postcodes_us_standard.csv" | |
sf = shapefile.Reader(shpsrc) | |
def shape_to_poly(shp): | |
parts = shp.parts[:] | |
parts.append(len(shp.points)) | |
poly = Polygon.Polygon() | |
for j in range(len(parts)-1): | |
pts = shp.points[parts[j]:parts[j+1]] | |
poly.addContour(pts) | |
return poly | |
shprecs = sf.shapeRecords() | |
polygons = [] | |
nutsids = [] | |
country_isos = set() | |
for sr in shprecs: | |
shp = sr.shape | |
poly = shape_to_poly(shp) | |
polygons.append(poly) | |
nuts = sr.record[1] | |
nutsids.append(nuts) | |
iso = nuts[:2] | |
country_isos.add(iso) | |
# Polygon.IO.writeSVG('test.svg', polygons) | |
src = csv.reader(open(zipsrc)) | |
out = csv.writer(open('zip2nuts.csv','w')) | |
out.writerow('iso,zip,nuts2'.split(',')) | |
notfound = csv.writer(open('notfound.csv', 'w')) | |
skipped_countries = set() | |
max_global_min_dist = 0 | |
max_dist_poly = -1 | |
skip = True | |
for city in src: | |
if skip: | |
skip = False | |
continue | |
zip,iso,lat,lon = city | |
lat = float(lat) | |
lon = float(lon) | |
if iso not in country_isos: | |
skipped_countries.add(iso) | |
continue | |
found = False | |
for i in range(len(polygons)): | |
poly = polygons[i] | |
nuts = nutsids[i] | |
if iso != nuts[:2]: | |
# not the same country, so skip this | |
continue | |
if poly.isInside(lon,lat): | |
out.writerow([iso,zip,nuts]) | |
found = True | |
break | |
if not found: | |
# lets find the nearest polygon | |
global_min_dist = 9999999999999 | |
globel_nearest_ll = None | |
nearest_poly = -1 | |
def dist(x0,y0,x1,y1): | |
dx = x0-x1 | |
dy = y0-y1 | |
return dx*dx+dy*dy | |
for i in range(len(polygons)): | |
min_dist = 9999999999999 | |
nearest_ll = None | |
poly = polygons[i] | |
nuts = nutsids[i] | |
if iso != nuts[:2]: | |
# not the same country, so skip this | |
continue | |
for j in range(len(poly)): | |
contour = poly.contour(j) | |
for x,y in contour: | |
dx = x - lon | |
dy = y - lat | |
dist = dx*dx+dy*dy | |
if dist < min_dist: | |
min_dist = dist | |
nearest_ll = (x,y) | |
if min_dist < global_min_dist: | |
global_min_dist = min_dist | |
global_nearest_ll = nearest_ll | |
nearest_poly = i | |
nlon,nlat = global_nearest_ll | |
nuts = nutsids[nearest_poly] | |
out.writerow([iso,zip,nuts]) | |
notfound.writerow([iso,zip,lon,lat,nuts,global_min_dist]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
I am new to Python, could you explain a bit how I should proceed.