Last active
October 14, 2021 01:39
-
-
Save wriglz/870ca3b49895262cd550c21095e0dca6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import geopandas as gpd | |
from shapely import wkt | |
import time | |
# Read pub points from CSV to a pandas dataframe | |
pubs = pd.read_csv('~/Documents/pub_points.csv', | |
header=None, names=['name', 'coordinates']) | |
# Parse the WKT coordinate format from PostGIS | |
pubs['coordinates'] = pubs['coordinates'].apply(wkt.loads) | |
# Turn the pandas dataframe into a GeoPandas dataframe. | |
pubs_gdf = gpd.GeoDataFrame(pubs, geometry='coordinates') | |
# Read the London Borough Boundary data from a shp file. | |
boroughs_gdf = gpd.GeoDataFrame.from_file( | |
'/path/to/statistical-gis-boundaries-london-2/ESRI/London_Borough_Excluding_MHW.shp') | |
# Make sure GeoPandas is aware that the CRS is British National Grid | |
pubs_gdf.crs = "EPSG:27700" | |
boroughs_gdf.crs = "EPSG:27700" | |
# Join the datasets based on location | |
start = time.time() | |
sjoined = gpd.sjoin(pubs_gdf, boroughs_gdf, op="within") | |
grouped = sjoined.groupby("NAME").size() | |
df = grouped.to_frame().reset_index() | |
df.columns = ['Borough_Name', 'Pub_name'] | |
end = time.time() | |
# Print time in milliseconds | |
print("Elapsed time (without spatial index):",(end - start)*1000) | |
# Add spatial index | |
pubs_gdf_idx = pubs_gdf | |
boroughs_gdf_idx = boroughs_gdf | |
pubs_gdf_idx.sindex | |
boroughs_gdf_idx.sindex | |
# Re-time the operation using spatial indexes | |
start = time.time() | |
sjoined = gpd.sjoin(pubs_gdf_idx, boroughs_gdf_idx, op="within") | |
grouped = sjoined.groupby("NAME").size() | |
df = grouped.to_frame().reset_index() | |
df.columns = ['Borough_Name', 'Pub_name'] | |
end = time.time() | |
#Print time in milliseconds | |
print("Elapsed time (with spatial index):",(end - start)*1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment