Skip to content

Instantly share code, notes, and snippets.

@marcoslot
Last active July 29, 2024 19:21
Show Gist options
  • Save marcoslot/8951d550024cc6645ad1fa8e0aa2841a to your computer and use it in GitHub Desktop.
Save marcoslot/8951d550024cc6645ad1fa8e0aa2841a to your computer and use it in GitHub Desktop.
Overture in Crunchy Bridge for Analytics
create foreign table addresses ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=addresses/type=*/*.parquet');
create foreign table base ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=base/type=*/*.parquet');
create foreign table buildings ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=buildings/type=building/*.parquet');
create foreign table buildings_part ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=buildings/type=building_part/*.parquet');
create foreign table divisions ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=divisions/type=*/*.parquet');
create foreign table places ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=places/type=*/*.parquet');
create foreign table transportation ()
server crunchy_lake_analytics
options (path 's3://overturemaps-us-west-2/release/2024-07-22.0/theme=transportation/type=*/*.parquet');
-- Queries from https://docs.overturemaps.org/getting-data/duckdb/
SELECT
id,
(names).primary as primary_name,
(bbox).xmin as x,
(bbox).ymin as y,
ST_GeomFromWKB(geometry) as geometry,
(categories).primary as main_category,
(sources[1]).dataset AS primary_source,
confidence
FROM places
WHERE (categories).primary = 'mountain' AND confidence > .90
ORDER BY confidence DESC;
SELECT
id,
(names).primary as primary_name,
height,
ST_GeomFromWKB(geometry) as geometry
FROM buildings
WHERE (names).primary IS NOT NULL
AND -84.36 < (bbox).xmin
AND -82.42 > (bbox).xmax
AND 41.71 < (bbox).ymin
AND 43.33 > (bbox).ymax;
SELECT
id,
division_id,
(names).primary,
ST_GeomFromWKB(geometry) as geometry
FROM
divisions
WHERE
type = 'division_area'
AND subtype = 'county'
AND country = 'US'
AND region = 'US-PA';
-- simple analytical query
select (categories).primary, count(*)
from places
where (addresses[1]).locality = 'Haarlem' and (categories).primary is not null
group by 1 order by 2 desc limit 10;
-- Example of creating a materialized view
create materialized view top_places as
select (addresses[1]).locality as city, (categories).primary as category, count(*)
from places
where (categories).primary is not null
group by 1, 2;
-- Periodically refresh (not very useful in this case, since data is static)
create unique index on top_places (city, category);
select cron.schedule('daily-refresh', '0 3 * * *', 'refresh materialized view concurrently top_places');
-- Query materialied view
select * from top_places where city = 'Amsterdam' order by count desc limit 30;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment