|
#!/usr/bin/env python3 |
|
|
|
import json |
|
import geopandas as gp |
|
import pandas as pd |
|
from shapely.geometry import Point |
|
|
|
pd.set_option('display.max_columns', None) |
|
|
|
DATA = [] |
|
|
|
def get_point(this_json): |
|
return Point([float(this_json['Place']['Location']['Translation'][k]) |
|
for k in ['Longitude', 'Latitude']]) |
|
|
|
ATCOS = {} |
|
ATCOT = {} |
|
with open('work/StopPoint.jsonl') as fin: |
|
for i in fin: |
|
this_json = json.loads(i) |
|
this_code = this_json['AtcoCode'] |
|
ATCO = this_code[:4] |
|
nptg = this_json['Place']['NptgLocalityRef'] |
|
if ATCO in ['9200', '9300']: |
|
continue |
|
if ATCO not in ['9100', '9400']: |
|
if nptg not in ATCOS: |
|
ATCOS[nptg] = set() |
|
ATCOS[nptg].add(ATCO) |
|
if 'MainNptgLocalities' in this_json['Place']: |
|
j = this_json['Place']['MainNptgLocalities']['NptgLocalityRef'] |
|
for k in j if isinstance(j, list) else [j]: |
|
v = k['value'] |
|
if v in ATCOS and ATCO in ATCOS[v]: |
|
continue |
|
if v not in ATCOT: |
|
ATCOT[v] = set() |
|
ATCOT[v].add(ATCO) |
|
else: |
|
output_json = {} |
|
output_json['Name'] = this_json['Descriptor']['CommonName'] |
|
output_json['Status'] = this_json['Status'] |
|
output_json['Type'] = this_json['StopClassification']['StopType'] |
|
output_json['Station_Name'] = output_json['Name'] |
|
if 'OffStreet' in this_json['StopClassification']: |
|
output_json['TIPLOC'] = this_json['StopClassification']['OffStreet']['Rail']['AnnotatedRailRef']['TiplocRef'] |
|
output_json['CRS'] = this_json['StopClassification']['OffStreet']['Rail']['AnnotatedRailRef']['CrsRef'] |
|
output_json['Station_Name'] = this_json['StopClassification']['OffStreet']['Rail']['AnnotatedRailRef']['StationName'] |
|
if 'xml:lang' in output_json['Station_Name']: |
|
output_json['Station_Name'] = output_json['Station_Name']['value'] |
|
if 'StopAreas' in this_json: |
|
output_json['StopAreaCode'] = this_json['StopAreas']['StopAreaRef']['value'] |
|
output_json['AdministrativeAreaRef'] = this_json['AdministrativeAreaRef'] |
|
output_json['code'] = this_code |
|
output_json['NPTG'] = nptg |
|
p = get_point(this_json) |
|
output_json['geometry'] = p |
|
DATA.append(output_json) |
|
|
|
CRSDATA = pd.read_csv('station_codes.csv') |
|
|
|
CRS = pd.DataFrame(columns=['Station_Name', 'CRS_code']) |
|
for i in range(0, CRSDATA.shape[1], 2): |
|
DF1 = CRSDATA.iloc[:, i:(i+2)] |
|
DF1.columns = ['Station_Name', 'CRS_code'] |
|
CRS = CRS.append(DF1).dropna() |
|
|
|
CRS = CRS.rename(columns={'Station Name': 'Station_Name', 'CRS Code': 'CRS_code'}) |
|
CRS['Rail_Station'] = CRS['Station_Name'] + ' Rail Station' |
|
CRS = CRS.set_index('Rail_Station').drop('Station_Name', axis=1) |
|
|
|
STATIONS = gp.GeoDataFrame(DATA) |
|
|
|
def get_ATCO(k): |
|
if k in ATCOS and ATCOS[k]: |
|
return list(ATCOS[k])[0][:3] |
|
if k in ATCOT and ATCOT[k]: |
|
return list(ATCOT[k])[0][:3] |
|
return None |
|
|
|
STATIONS['ATCO'] = STATIONS['NPTG'].apply(get_ATCO) |
|
|
|
STATIONS = STATIONS[STATIONS['Type'].isin(['RLY', 'MET'])] |
|
for k in [' Railway)', ' Rly)', '(RHDR)', '(KESR)', '(Isle of Wight Steam Railway', '(Welsh Highland Rly-Caernarfon)', '(W&LLR)', '(Peak Rail)', '(Battfield Line)']: |
|
n = len(k) |
|
IDX = (STATIONS['Name'].str[-n:] == k) |
|
STATIONS.loc[IDX, 'Type'] = 'HRT' |
|
|
|
STATIONS = STATIONS.join(CRS, on='Name').fillna('-') |
|
STATIONS.to_file('shp/Stations.shp', crs='EPSG:4326') |
|
|
|
CRS_CODES = set(CRS['CRS_code']) |
|
STATION_CODES = set(STATIONS['CRS']) |
|
MISMATCH = list(CRS_CODES - STATION_CODES) |
|
CRS[CRS['CRS_code'].isin(MISMATCH)].to_csv('CRS-mismatch-report.tsv', sep='\t') |
|
|
|
STATIONS.to_file('output-stations.json', crs='EPSG:4326', driver='GeoJSON') |