Last active
January 16, 2024 23:12
-
-
Save cholmes/d6dc942a206fe37bd1b26ce8a7b83a03 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import pandas as pd | |
# Function to count records with 'area_in_meters' over 1000 | |
def count_large_areas(parquet_url): | |
# Connect to DuckDB | |
conn = duckdb.connect(database=':memory:') | |
# Read the remote Parquet file | |
buildings_df = conn.execute(f"SELECT * FROM read_parquet('{parquet_url}')").fetchdf() | |
# Count records where 'area_in_meters' > 1000 | |
count = buildings_df[buildings_df['area_in_meters'] > 1000].shape[0] | |
# Close the connection | |
conn.close() | |
return count | |
# URL of the remote Parquet file | |
parquet_url = 'https://data.source.coop/cholmes/google-open-buildings/geoparquet-by-country/country_iso=AG/AG.parquet' | |
# Get the count and print it | |
record_count = count_large_areas(parquet_url) | |
print(f"Number of records with 'area_in_meters' > 1000: {record_count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment