Created
January 14, 2025 17:58
-
-
Save cholmes/830a714dac4adb03878d56fdfc9fe1ad to your computer and use it in GitHub Desktop.
Time query of DuckDB httpfs spatial filtering
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import duckdb | |
def run_duckdb_query(): | |
# Connect to DuckDB (using an in-memory database as an example) | |
conn = duckdb.connect(database=':memory:') | |
# Load the spatial extension | |
conn.execute("LOAD spatial") | |
# Start timing | |
start_time = time.time() | |
# Execute your query to create the table | |
conn.execute(""" | |
CREATE TABLE download_data AS ( | |
SELECT name, geometry | |
FROM read_parquet('s3://us-west-2.opendata.source.coop/fused/fsq-os-places/2024-12-03/places/*.parquet') | |
WHERE | |
bbox.xmin BETWEEN 4.8331439708686545 AND 16.12698403011606 | |
AND bbox.ymin BETWEEN 45.80185287212273 AND 56.15331253529212 | |
) | |
""") | |
# Stop timing | |
end_time = time.time() | |
# Print time elapsed | |
print(f"Query took {end_time - start_time:.2f} seconds.") | |
# Query the table to get the count of records | |
count_result = conn.execute("SELECT COUNT(*) FROM download_data").fetchone()[0] | |
print(f"Number of rows in download_data: {count_result}") | |
if __name__ == "__main__": | |
run_duckdb_query() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Query time from Milan, Italy (ran it two times) with fast connection:
Context: https://www.linkedin.com/posts/dominik-weckm%C3%BCller_duckdb-for-geospatial-activity-7284826680399945728-mmJZ (see comments).