|
#!/usr/bin/env python3 |
|
|
|
import os |
|
from functools import partial |
|
from fiona.transform import transform_geom |
|
|
|
import pandas as pd |
|
import geopandas as gp |
|
from shapely.ops import transform |
|
|
|
# EPSG:4326 WG 84 |
|
# EPSG:32630 |
|
# EPSG:27700 OS GB36 |
|
|
|
URBANTYPES = {'Large Town', |
|
'Large Town in Conurbation', |
|
'Core City (outside London)', |
|
'Village or small community in Conurbation', |
|
'Other City', |
|
'Small Town in Conurbation', |
|
'Small Town', |
|
'Medium Town', |
|
'Medium Town in Conurbation', |
|
'Core City (London)', |
|
'Village or Small Community in Conurbation'} |
|
|
|
def _set_precision(precision=6): |
|
def _precision(x, y, z=None): |
|
return tuple([round(i, precision) for i in [x, y, z] if i]) |
|
return partial(transform, _precision) |
|
|
|
pd.set_option('display.max_columns', None) |
|
|
|
print('Load Output Area Data') |
|
TOWNDATA = pd.read_csv('oa-classification-csv.csv') |
|
|
|
TOWNDATA['name'] = TOWNDATA['bua_name'] |
|
IDX1 = TOWNDATA['bua_name'] == 'None' |
|
TOWNDATA.loc[IDX1, 'name'] = TOWNDATA.loc[IDX1, 'la_name'] |
|
for k in [' BUA in Conurbation', ' BUASD', ' BUA']: |
|
TOWNDATA['name'] = TOWNDATA['name'].str.replace(k, '') |
|
|
|
TOWNS = TOWNDATA.groupby(['bua_code', 'name', 'region_name']).filter(lambda v: v['population'].sum() > 1) |
|
TOWNS['Town'] = TOWNDATA['name'] |
|
TOWNDATA = None |
|
|
|
IDX2 = TOWNS['citytownclassification'].isin(URBANTYPES) |
|
TOWNS['urban'] = 'N' |
|
TOWNS.loc[IDX2, 'urban'] = 'Y' |
|
|
|
print('Loaded Output Area Data') |
|
print('Load Scotland') |
|
SCDATA = gp.read_file('work/OutputArea2011_MHW.shp') |
|
CRS = SCDATA.crs.srs |
|
|
|
print('Aggregate Scotland') |
|
SC = SCDATA[['DataZone', 'geometry']].dissolve(by='DataZone', aggfunc='sum') |
|
|
|
SDX1 = TOWNS['lsoa_code'].str[:1] == 'S' |
|
G1 = TOWNS.loc[SDX1, ['lsoa_code']].join(SC['geometry'], on='lsoa_code') |
|
SC = None |
|
SCDATA = None |
|
print('Loaded Scotland') |
|
|
|
print('Load England and Wales') |
|
EW = gp.read_file('work/Output_Areas__December_2011__Boundaries_EW_BGC.shp') |
|
EW = EW.to_crs(CRS) |
|
EW = EW.set_index('OA11CD') |
|
G2 = TOWNS.loc[~SDX1, ['outputarea_code']].join(EW['geometry'], on='outputarea_code') |
|
EW = None |
|
print('Loaded England and Wales') |
|
|
|
print('Create GeoDataFrame') |
|
GEOMETRY = pd.concat([G2['geometry'], G1['geometry']]) |
|
G1 = None |
|
G2 = None |
|
|
|
GF1 = gp.GeoDataFrame(TOWNS, geometry=GEOMETRY, crs=CRS) |
|
_precision = _set_precision(1) |
|
GF1['geometry'] = GF1['geometry'].apply(_precision) |
|
|
|
print('Write GeoDataFrame') |
|
GF1.to_file('all_boundaries.geojson', crs=CRS, driver='GeoJSON') |
|
GF1.to_file('shp/all_boundaries.shp', crs=CRS) |
|
|
|
GEOMETRY = None |
|
print('Aggregate by region, town, Built Up Area (BUA) and Middle Super-Output-Area') |
|
KEYS = ['region_name', 'Town', 'bua_code', 'msoa_code'] |
|
GF2 = GF1[KEYS + ['geometry', 'population']].dissolve(by=KEYS, aggfunc=sum) |
|
#GF2 = GF2.to_crs('EPSG:4326') |
|
|
|
print('Aggregated data') |
|
FULLKEYS = KEYS + ['outputarea_code', 'lsoa_code', 'la_name', 'bua_name', 'constituency_name', 'citytownclassification', 'urban'] |
|
DF1 = TOWNS[FULLKEYS].drop_duplicates(subset=KEYS).set_index(KEYS) |
|
|
|
_precision = _set_precision(5) |
|
POINTS = gp.GeoDataFrame(geometry=GF2.centroid, crs=CRS).to_crs('EPSG:4326') |
|
POINTS['geometry'] = POINTS['geometry'].apply(_precision) |
|
POINTS['data'] = POINTS['geometry'].apply(lambda v: [v.x, v.y]) |
|
POINTS['longitude'], POINTS['latitude'] = zip(*POINTS.pop('data')) |
|
DF2 = POINTS[['longitude', 'latitude']] |
|
|
|
BOUNDARIES = GF2.join(DF1).join(DF2).reset_index().fillna('-') |
|
BOUNDARIES = BOUNDARIES.to_crs('EPSG:4326') |
|
for k in ['bua_code', 'bua_name', 'citytownclassification']: |
|
BOUNDARIES[k] = BOUNDARIES[k].str.replace('None', '-') |
|
|
|
#from matplotlib import pyplot as plt |
|
#fig = plt.figure() |
|
#ax1 = fig.add_subplot(111) |
|
|
|
print('Segment boundaries') |
|
INTERVAL = {'rural': 0, '5k': 5E3, '10k': 10E3, '50k': 50E3, '100k': 100E3, 'MC1': 1E9} |
|
BINS = pd.IntervalIndex.from_breaks(list(INTERVAL.values()), closed='left') |
|
KEYS = ['region_name', 'Town', 'bua_code'] |
|
POPULATION = BOUNDARIES[KEYS + ['population']].groupby(KEYS).sum() |
|
CUT = pd.cut(POPULATION['population'].to_numpy(), BINS) |
|
CUT = CUT.rename_categories({i: j for i, j in zip(CUT.categories, INTERVAL.keys())}) |
|
POPULATION['pgroup'] = CUT.tolist() |
|
BOUNDARIES = BOUNDARIES.set_index(KEYS).join(POPULATION['pgroup']).reset_index() |
|
IDX3 = BOUNDARIES['urban'] == 'N' |
|
BOUNDARIES.loc[IDX3, 'pgroup'] = 'rural' |
|
|
|
print('Write full boundaries') |
|
_precision = _set_precision(5) |
|
BOUNDARIES['geometry'] = BOUNDARIES['geometry'].apply(_precision) |
|
BOUNDARIES.to_file('shp/full_boundaries.shp', crs='epsg:4326') |
|
BOUNDARIES.to_file('full_boundaries.geojson', crs='epsg:4326', driver='GeoJSON') |
|
|
|
print('Write segmented boundaries') |
|
for k, g in BOUNDARIES.groupby('pgroup'): |
|
stub = 'town_boundaries_{}'.format(k) |
|
g.to_file('shp/{}.shp'.format(stub), crs='epsg:4326') |
|
g.to_file('{}.geojson'.format(stub), driver='GeoJSON', crs='epsg:4326') |
|
|
|
BOUNDARIES = BOUNDARIES.to_crs('epsg:27700') |
|
for k, g in BOUNDARIES.groupby('pgroup'): |
|
stub = 'town_boundaries_gb_{}'.format(k) |
|
g.to_file('shp/{}.shp'.format(stub), crs='epsg:27700') |
|
|