wriglz’s gists

wriglz / window_functions.sql

Last active October 20, 2022 14:40

SQL utilising window functions to explore NOAA Global Hurricane Tracks

	WITH
	hurricane_geometry AS (
	SELECT
	* EXCEPT (longitude, latitude), -- Select everything except lat & lon
	ST_GEOGPOINT(longitude, latitude) AS geom, -- As we create a geometry here
	MAX(usa_wind) OVER (PARTITION BY sid) AS max_wind_speed -- Calculate the max wind speed per storm
	FROM
	`bigquery-public-data.noaa_hurricanes.hurricanes`
	WHERE
	basin = 'NA' -- Select only storms in the North American basin

wriglz / osm_extract.sql

Created October 3, 2022 16:07

SQL to query OSM data on Google BigQuery

	SELECT
	osm_id,
	feature_type,
	osm_timestamp,
	geometry,
	-- Here we are going to extract a couple of attributes from the all_tags array:
	(SELECT value FROM UNNEST(all_tags) WHERE key = 'name' ) AS name,
	(SELECT value FROM UNNEST(all_tags) WHERE key = 'addr:city') AS city
	FROM
	bigquery-public-data.geo_openstreetmap.planet_features

wriglz / lifeboat.sql

Created September 9, 2022 16:17

SQL to explore the RNLI open "Returns of Service" data set

	-- Data source: https://data-rnli.opendata.arcgis.com/

	-- Remove outliers which are beyond the reach of the maximum lifeboat range:
	SELECT
	rescue.*
	FROM
	`RNLI.RNLI_Returns_of_Service` rescue
	LEFT JOIN
	`RNLI.RNLI_Lifeboat_Station_Locations` station
	ON

wriglz / random_number.sql

Created September 7, 2022 12:54

SQL to generate a random number within a specified <min> <max> range

	-- Generate a random number within a specified <min> <max> range

	SELECT
	CAST(<min> + RAND() * (<max> - <min>) AS INT) AS random_number

wriglz / nearest_neighbour_bigquery.sql

Last active August 12, 2022 13:08

Spatial SQL to return the id of the closest geometry from table_b to each geometry in table_a

	SELECT
	a.id,
	-- Note that on line 13 the results are grouped by a.id so if you want to return the geometry from table_a,
	-- here you can use the function ANY_VALUE(a.geom) (as you cannot group by geometry in BigQuery)
	ARRAY_AGG(b.id ORDER BY ST_Distance(a.geog, b.geog) LIMIT 1) [ORDINAL(1)] AS neighbor_id
	-- Here we return the id of the closest geometry from table_b to each geometry in table_a.
	-- If you want to include more fields from table_b here you can use STRUCT(b.id, b.second_field, b.third_field) etc.
	FROM
	table_a a
	JOIN

wriglz / ndjson.sh

Last active August 4, 2022 10:47

Parse JSON to new line delimited JSON using JQ

	# NB. First install jq using homebrew
	cat original.geojson \| jq -c '.[]' > new_line_delimited.json

wriglz / national_park_voronoi.sql

Last active January 17, 2023 12:56

SQL to generate Voronoi Polygons to determine National Park catchment areas.

	/*
	Data sources for National Park boundaries:
	- England: https://environment.data.gov.uk/DefraDataDownload/?mapService=NE/NationalParksEngland&Mode=spatial
	- Scotland: https://spatialdata.gov.scot/geonetwork/srv/eng/catalog.search#/home
	- Wales: https://datamap.gov.wales/layers/inspire-nrw:NRW_NATIONAL_PARK
	*/

	WITH
	park_info AS(
	-- Select required information about each National Park from a merged dataset

wriglz / bigquery_GeoJSON.sql

Created July 14, 2022 08:11

SQL to parse geometry object from GeoJSON feature collection for use in BigQuery

	UPDATE
	`project.dataset.table`
	SET
	geom = ST_GEOGFROMGEOJSON(
	JSON_QUERY(geo_json,'$.geometry'), make_valid => TRUE)
	WHERE
	geom IS NULL

wriglz / snap_points_to_lines.sql

Last active September 1, 2022 04:21

SQL to snap points to the closest line within a predefined radius

	-- Snap the points to their closest lines, found in the subquery below
	SELECT
	point_id,
	line_id,
	ST_LINE_INTERPOLATE_POINT(line_geom,
	ST_Line_Locate_Point(line_geom, point_geom)) AS snapped_points --Create the snapped points
	FROM

	--Subquery to find the closest line to each point (within a pre-defined raidus)
	(

wriglz / london_pub_count.py

Last active October 14, 2021 01:39

	import pandas as pd
	import geopandas as gpd
	from shapely import wkt
	import time

	# Read pub points from CSV to a pandas dataframe
	pubs = pd.read_csv('~/Documents/pub_points.csv',
	header=None, names=['name', 'coordinates'])

	# Parse the WKT coordinate format from PostGIS

Simon Wrigley wriglz