Last active
January 2, 2016 09:09
-
-
Save ali1234/8281203 to your computer and use it in GitHub Desktop.
Tool for querying the Code-Point Open postcode dataset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright 2014 Alistair Buxton <[email protected]> | |
# Build a database from the Code-Point Open postcode data and query it. | |
# Run ./postcode.py create to build the database from source files. | |
# Now with caching to speed up development. | |
import sqlite3 | |
import csv | |
import shelve | |
cache = shelve.open('postcode.cache') | |
# http://stackoverflow.com/questions/811548/sqlite-and-python-return-a-dictionary-using-fetchone | |
def dict_factory(cursor, row): | |
d = {} | |
for idx,col in enumerate(cursor.description): | |
d[col[0]] = row[idx] | |
return d | |
conn = sqlite3.connect('postcode.db') | |
conn.row_factory = dict_factory | |
def create_database(): | |
c = conn.cursor() | |
c.execute('''CREATE TABLE counties | |
(code text, county_name text)''') | |
c.execute('''CREATE TABLE districts | |
(code text, district_name text, county text)''') | |
c.execute('''CREATE TABLE wards | |
(code text, ward_name text, district text, county text)''') | |
c.execute('''CREATE TABLE regions | |
(code text, region_name text)''') | |
c.execute('''CREATE TABLE areas | |
(code text, area_name text, region text)''') | |
c.execute('''CREATE TABLE postcodes | |
(flat text, postcode text, country text, region text, area text, county text, district text, ward text, eastings text, northings text)''') | |
with open('Code-Point Open/Doc/Codelist.txt', 'rb') as f: | |
reader = csv.reader(f) | |
counties = [] | |
districts = [] | |
wards = [] | |
regions = [] | |
areas = [] | |
for row in reader: | |
if len(row) == 4: | |
if row[2] == '': | |
if row[1] == '': | |
counties.append((row[0], row[3])) | |
else: | |
districts.append((row[1], row[3], row[0])) | |
else: | |
wards.append((row[2], row[3], row[1], row[0])) | |
elif len(row) == 3: | |
if row[1] == '': | |
regions.append((row[0], row[2])) | |
else: | |
areas.append((row[1], row[2], row[0])) | |
c.executemany('INSERT INTO counties VALUES (?,?)', counties) | |
c.executemany('INSERT INTO districts VALUES (?,?,?)', districts) | |
c.executemany('INSERT INTO wards VALUES (?,?,?,?)', wards) | |
c.executemany('INSERT INTO regions VALUES (?,?)', regions) | |
c.executemany('INSERT INTO areas VALUES (?,?,?)', areas) | |
datafiles = ['ab', 'al', 'ba', 'bb', 'b', 'bd', 'bh', 'bl', 'bn', 'br', 'bs', 'ca', 'cb', 'cf', 'ch', 'cm', 'co', 'cr', 'ct', 'cv', 'cw', 'da', 'dd', 'de', 'dg', 'dh', 'dl', 'dn', 'dt', 'dy', 'ec', 'e', 'eh', 'en', 'ex', 'fk', 'fy', 'g', 'gl', 'gu', 'ha', 'hd', 'hg', 'hp', 'hr', 'hs', 'hu', 'hx', 'ig', 'ip', 'iv', 'ka', 'kt', 'kw', 'ky', 'la', 'l', 'ld', 'le', 'll', 'ln', 'ls', 'lu', 'm', 'me', 'mk', 'ml', 'n', 'ne', 'ng', 'nn', 'np', 'nr', 'nw', 'ol', 'ox', 'pa', 'pe', 'ph', 'pl', 'po', 'pr', 'rg', 'rh', 'rm', 'sa', 's', 'se', 'sg', 'sk', 'sl', 'sm', 'sn', 'so', 'sp', 'sr', 'ss', 'st', 'sw', 'sy', 'ta', 'td', 'tf', 'tn', 'tq', 'tr', 'ts', 'tw', 'ub', 'wa', 'wc', 'w', 'wd', 'wf', 'wn', 'wr', 'ws', 'wv', 'yo', 'ze'] | |
for d in datafiles: | |
with open('Code-Point Open/Data/'+d+'.csv', 'rb') as f: | |
reader = csv.reader(f) | |
rows = [] | |
for row in reader: | |
rows.append((row[0].replace(' ', ''), row[0], row[12], row[13], row[14], row[15], row[16], row[17], row[10], row[11])) | |
c.executemany('INSERT INTO postcodes VALUES (?,?,?,?,?,?,?,?,?,?)', rows) | |
conn.commit() | |
def lookup_postcode(postcode): | |
postcode = postcode.strip().replace(' ', '') | |
if postcode in cache: | |
return cache[postcode] | |
c = conn.cursor() | |
# Here is the magic. Connect everything with left joins. | |
# We join each on postcodes only, because some areas don't have a region | |
# name, some regions don't have a county name etc, but they always have | |
# a code in the postcode row. | |
pc = c.execute(""" | |
SELECT * FROM postcodes | |
LEFT JOIN regions | |
ON regions.code = postcodes.region | |
LEFT JOIN areas | |
ON areas.code = postcodes.area | |
AND areas.region = postcodes.region | |
LEFT JOIN counties | |
ON counties.code = postcodes.county | |
LEFT JOIN districts | |
ON districts.code = postcodes.district | |
AND districts.county = postcodes.county | |
LEFT JOIN wards | |
ON wards.code = postcodes.ward | |
AND wards.district = postcodes.district | |
AND wards.county = postcodes.county | |
WHERE postcodes.flat=? LIMIT 1""", (postcode,)).fetchone() | |
cache[postcode] = pc | |
return pc | |
if __name__ == '__main__': | |
import sys | |
if sys.argv[1] == 'create': | |
create_database() | |
else: | |
for pc in sys.argv[1:]: | |
print lookup_postcode(pc) | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment