Created
December 15, 2023 10:11
-
-
Save r-leyshon/222dd4039143398ad9574fba83c32752 to your computer and use it in GitHub Desktop.
Ingest all UK LSOA21 boundaries from ONS Geoportal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import geopandas as gpd | |
import pandas as pd | |
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/" | |
"Lower_layer_Super_Output_Areas_2021_EW_BFC_V8/FeatureServer/0/query" | |
params = { | |
"where": "1=1", # SQL clauses can go here | |
"outSR": 4326, # CRS that you want | |
"f": "geoJSON", # response format | |
"resultOffset": 0, # parameter used for pagination later | |
} | |
def request_to_gdf(url:str, query_params:dict) -> gpd.GeoDataFrame: | |
"""Send a get request to ArcGIS API & Convert to GeoDataFrame. | |
Only works when asking for features and GeoJSON format. | |
Parameters | |
---------- | |
url : str | |
The url endpoint. | |
query_params : dict | |
A dictionary of query parameter : value pairs. | |
Returns | |
------- | |
requests.response | |
The response from ArcGIS API server. Useful for paginated requests | |
later. | |
gpd.GeoDataFrame | |
A GeoDataFrame of the requested geometries in the crs specified by the | |
response metadata. | |
Raises | |
------ | |
requests.exceptions.RequestException | |
The response was not ok. | |
""" | |
# this approach will only work with geoJSON | |
query_params["f"] = "geoJSON" | |
# get the response | |
response = requests.get(url, params=query_params) | |
if response.ok: | |
# good response (hopefully, but be careful for JSONDecodeError) | |
content = response.json() | |
return ( | |
response, # we'll need the response again later for pagination | |
gpd.GeoDataFrame.from_features( | |
content["features"], | |
crs=content["crs"]["properties"]["name"] | |
# safest to get crs from response | |
)) | |
else: | |
# cases where a traditional bad response may be returned | |
raise requests.RequestException( | |
f"HTTP Code: {response.status_code}, Status: {response.reason}" | |
) | |
offset = len(gdf) # number of records to offset by | |
all_lsoas = gdf # we can append our growing gdf of LSOA boundaries to this | |
while more_pages: | |
params["resultOffset"] += offset # increment the records to ingest | |
response, gdf = request_to_gdf(ENDPOINT, params) | |
content = response.json() | |
all_lsoas = pd.concat([all_lsoas, gdf]) | |
try: | |
more_pages = content["properties"]["exceededTransferLimit"] | |
except KeyError: | |
# rather than exceededTransferLimit = False, it disappears... | |
more_pages = False | |
all_lsoas = all_lsoas.reset_index(drop=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment