Last active
February 23, 2022 21:44
-
-
Save bitner/8c4b2ef89eae70b425951a3d7986be71 to your computer and use it in GitHub Desktop.
pg_funstuff_for_tammo.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Make sure that you do all of this in your own schema | |
-- Postgres has a search path where it is always going to create new things (tables, functions, etc) in the first entry that exists in your search_path | |
SHOW search_path; | |
-- pgstac lives in the pgstac schema, to use pgstac without needing to prefix everything | |
-- you always want to have public in your search_path as that is where all the main things in the database are (including postgis - think of it like /usr/bin) | |
SET SEARCH_PATH to pgstac, public; | |
-- Create your own schema to play with these things | |
CREATE SCHEMA playground; | |
-- Add that schema to your path | |
SET SEARCH_PATH TO playground, pgstac, public; | |
-- This change is ONLY going to stick around for the current session | |
-- You can change the default search_path for a database role by altering the role | |
ALTER ROLE <myrole> SET SEARCH_PATH TO playground, pgstac, public; | |
-- SQL Functions documentation - https://www.postgresql.org/docs/13/xfunc-sql.html | |
-- SQL Function that wraps st_squaretile to return the tiles that a geometry intersects | |
-- https://postgis.net/docs/ST_SquareGrid.html | |
CREATE FUNCTION gridcells | |
(_geom geometry) | |
RETURNS SETOF geometry -- the "SETOF" says that this query can return multiple results as rows | |
AS $$ | |
SELECT geom FROM st_squaregrid(5, _geom) WHERE st_intersects(geom, _geom); | |
$$ LANGUAGE SQL IMMUTABLE; -- the "IMMUTABLE" says that this query will always return the same results given the same arguments | |
-- SQL Function that does not return anything, but is shorthand to conveniently do something with variables | |
CREATE TABLE collection_grid_month( | |
collection_id text, | |
month timestamptz, | |
grid geometry, | |
count int | |
UNIQUE (collection_id, month, grid) | |
); | |
CREATE OR REPLACE FUNCTION update_aggregate(_collection text, _month timestamptz) RETURNS VOID AS $$ | |
WITH newrows AS ( | |
SELECT | |
id, | |
datetime, | |
collection_id, | |
gridcells(geometry) -- use the set returning function we created above | |
FROM items | |
WHERE datetime >= date_trunc('month', _month) AND datetime < (date_trunc('month', _month) + '1 month'::interval) AND collection_id=_collection | |
), results AS ( | |
SELECT | |
date_trunc('month', datetime), | |
collection_id, | |
geom, | |
count(*) | |
FROM t GROUP BY 1,2,3 | |
) INSERT INTO collection_grid_month (month, collection_id, geom, count) | |
SELECT * FROM results | |
ON CONFLICT (collection_id, month, grid) -- this has to match the unique constraint that we put on the table | |
DO UPDATE SET count=EXCLUDED.count WHERE count != EXCLUDED.count; -- make sure that we update records that are already in our table but do not update records that haven't changed | |
$$ LANGUAGE SQL; | |
-- now we can update a month/collection by just calling this function | |
SELECT update_aggregate('spire','2020-01-01'); | |
-- if we want to bulkload things we want to make sure that we use a transaction for each month so that if something bails we don't lose everything | |
-- \gexec is a command in the psql client that will execute row-by-row the queries that are formed by the previous command https://www.postgresql.org/docs/13/app-psql.html | |
-- format is a function that can format a string using variables in pg https://www.postgresql.org/docs/13/functions-string.html#FUNCTIONS-STRING-FORMAT | |
-- you can use $$ to start and end a string so that you don't have to worry about quote escaping | |
-- in format, %s denotes a string as is %L will make sure to escape a string with quotes as a literal and %I escapes with double quotes as an identifier (ie table name) | |
SELECT format( | |
$$ | |
SELECT 'myvar_%s', %L; | |
$$, | |
intseries, | |
tstzseries | |
) FROM | |
generate_series(0,10,1) intseries, | |
generate_series('2020-01-01'::timestamptz, '2021-01-01'::timestamptz, '1 month'::interval) as tstzseries | |
; | |
-- you see that this just returns records that are sql statements themselves | |
-- if we use the \gexec command it will run each query separately and as long as you are not in a transaction, will commit each row separately | |
\gexec | |
-- we can use this trick to run each month/collection as it's own transaction with our function we created above | |
-- we can create a table that we just use as a queue | |
CREATE TABLE collection_months AS SELECT DISTINCT collection_id, date_trunc('month', datetime) as month FROM items; | |
SELECT format( | |
$$ | |
SELECT update_aggregate(%L, %L); | |
$$, | |
collection_id, | |
month | |
) FROM collection_months m WHERE NOT EXISTS ( -- in case we are doing a rerun, don't try with anything that we already have processed | |
SELECT TRUE FROM collection_grid_month cgm WHERE m.month=cgm.month and m.collection_id=cgm.collection_id LIMIT 1 | |
); | |
\gexec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment