Skip to content

Instantly share code, notes, and snippets.

@ali1234
Last active January 2, 2016 09:09
Show Gist options
  • Save ali1234/8281203 to your computer and use it in GitHub Desktop.
Save ali1234/8281203 to your computer and use it in GitHub Desktop.
Tool for querying the Code-Point Open postcode dataset.
#!/usr/bin/env python
# Copyright 2014 Alistair Buxton <[email protected]>
# Build a database from the Code-Point Open postcode data and query it.
# Run ./postcode.py create to build the database from source files.
# Now with caching to speed up development.
import sqlite3
import csv
import shelve
cache = shelve.open('postcode.cache')
# http://stackoverflow.com/questions/811548/sqlite-and-python-return-a-dictionary-using-fetchone
def dict_factory(cursor, row):
d = {}
for idx,col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
conn = sqlite3.connect('postcode.db')
conn.row_factory = dict_factory
def create_database():
c = conn.cursor()
c.execute('''CREATE TABLE counties
(code text, county_name text)''')
c.execute('''CREATE TABLE districts
(code text, district_name text, county text)''')
c.execute('''CREATE TABLE wards
(code text, ward_name text, district text, county text)''')
c.execute('''CREATE TABLE regions
(code text, region_name text)''')
c.execute('''CREATE TABLE areas
(code text, area_name text, region text)''')
c.execute('''CREATE TABLE postcodes
(flat text, postcode text, country text, region text, area text, county text, district text, ward text, eastings text, northings text)''')
with open('Code-Point Open/Doc/Codelist.txt', 'rb') as f:
reader = csv.reader(f)
counties = []
districts = []
wards = []
regions = []
areas = []
for row in reader:
if len(row) == 4:
if row[2] == '':
if row[1] == '':
counties.append((row[0], row[3]))
else:
districts.append((row[1], row[3], row[0]))
else:
wards.append((row[2], row[3], row[1], row[0]))
elif len(row) == 3:
if row[1] == '':
regions.append((row[0], row[2]))
else:
areas.append((row[1], row[2], row[0]))
c.executemany('INSERT INTO counties VALUES (?,?)', counties)
c.executemany('INSERT INTO districts VALUES (?,?,?)', districts)
c.executemany('INSERT INTO wards VALUES (?,?,?,?)', wards)
c.executemany('INSERT INTO regions VALUES (?,?)', regions)
c.executemany('INSERT INTO areas VALUES (?,?,?)', areas)
datafiles = ['ab', 'al', 'ba', 'bb', 'b', 'bd', 'bh', 'bl', 'bn', 'br', 'bs', 'ca', 'cb', 'cf', 'ch', 'cm', 'co', 'cr', 'ct', 'cv', 'cw', 'da', 'dd', 'de', 'dg', 'dh', 'dl', 'dn', 'dt', 'dy', 'ec', 'e', 'eh', 'en', 'ex', 'fk', 'fy', 'g', 'gl', 'gu', 'ha', 'hd', 'hg', 'hp', 'hr', 'hs', 'hu', 'hx', 'ig', 'ip', 'iv', 'ka', 'kt', 'kw', 'ky', 'la', 'l', 'ld', 'le', 'll', 'ln', 'ls', 'lu', 'm', 'me', 'mk', 'ml', 'n', 'ne', 'ng', 'nn', 'np', 'nr', 'nw', 'ol', 'ox', 'pa', 'pe', 'ph', 'pl', 'po', 'pr', 'rg', 'rh', 'rm', 'sa', 's', 'se', 'sg', 'sk', 'sl', 'sm', 'sn', 'so', 'sp', 'sr', 'ss', 'st', 'sw', 'sy', 'ta', 'td', 'tf', 'tn', 'tq', 'tr', 'ts', 'tw', 'ub', 'wa', 'wc', 'w', 'wd', 'wf', 'wn', 'wr', 'ws', 'wv', 'yo', 'ze']
for d in datafiles:
with open('Code-Point Open/Data/'+d+'.csv', 'rb') as f:
reader = csv.reader(f)
rows = []
for row in reader:
rows.append((row[0].replace(' ', ''), row[0], row[12], row[13], row[14], row[15], row[16], row[17], row[10], row[11]))
c.executemany('INSERT INTO postcodes VALUES (?,?,?,?,?,?,?,?,?,?)', rows)
conn.commit()
def lookup_postcode(postcode):
postcode = postcode.strip().replace(' ', '')
if postcode in cache:
return cache[postcode]
c = conn.cursor()
# Here is the magic. Connect everything with left joins.
# We join each on postcodes only, because some areas don't have a region
# name, some regions don't have a county name etc, but they always have
# a code in the postcode row.
pc = c.execute("""
SELECT * FROM postcodes
LEFT JOIN regions
ON regions.code = postcodes.region
LEFT JOIN areas
ON areas.code = postcodes.area
AND areas.region = postcodes.region
LEFT JOIN counties
ON counties.code = postcodes.county
LEFT JOIN districts
ON districts.code = postcodes.district
AND districts.county = postcodes.county
LEFT JOIN wards
ON wards.code = postcodes.ward
AND wards.district = postcodes.district
AND wards.county = postcodes.county
WHERE postcodes.flat=? LIMIT 1""", (postcode,)).fetchone()
cache[postcode] = pc
return pc
if __name__ == '__main__':
import sys
if sys.argv[1] == 'create':
create_database()
else:
for pc in sys.argv[1:]:
print lookup_postcode(pc)
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment