Created
June 30, 2023 12:46
-
-
Save duncanjbrown/a3f2aaa5ff9dc3d3d30ac31505290ab1 to your computer and use it in GitHub Desktop.
Prepare an ONS shapefile for BigQuery
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas as gpd | |
from shapely.geometry import Polygon, MultiPolygon | |
gdf = gpd.read_file('zip://Regions_December_2022_EN_BUC-shapefile.zip') | |
# Convert to EPSG:4326 from OSGB36 | |
gdf = gdf.to_crs("EPSG:4326") | |
def correct_ring_orientation(geometry): | |
if isinstance(geometry, Polygon): | |
if not geometry.exterior.is_ccw: | |
geometry = Polygon(list(geometry.exterior.coords)[::-1], [inner.coords[::-1] if inner.is_ccw else inner.coords[:] for inner in geometry.interiors]) | |
elif isinstance(geometry, MultiPolygon): | |
geometry = MultiPolygon([correct_ring_orientation(poly) for poly in geometry.geoms]) | |
return geometry | |
# if you do not do this, MULTIPOLYGONs will not import correctly | |
gdf['geometry'] = gdf['geometry'].apply(correct_ring_orientation) | |
# add a wkt column to hold the actual data | |
gdf['wkt_geometry'] = gdf['geometry'].apply(lambda x: x.wkt) | |
# write whichever fields you want to a csv, then upload to BQ via Google Cloud Storage | |
# ensure that you select GEOGRAPHY for the WKT column | |
gdf[['RGN22CD', 'RGN22NM', 'wkt_geometry']].to_csv('regions.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment