Created
November 9, 2024 00:45
-
-
Save cboettig/4f31ca4cea7a1352129a67654ad99820 to your computer and use it in GitHub Desktop.
NRP use of S3 + duckdb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%time | |
import ibis | |
from ibis import _ | |
con = ibis.duckdb.connect() | |
endpoint="rook-ceph-rgw-nautiluss3.rook" | |
query= f''' | |
CREATE OR REPLACE SECRET secret2 ( | |
TYPE S3, | |
KEY_ID '{access_key}', | |
SECRET '{secret_key}', | |
ENDPOINT '{endpoint}', | |
URL_STYLE 'path', | |
USE_SSL 'FALSE' | |
); | |
''' | |
con.raw_sql(query) | |
gbif = con.read_parquet("s3://cboettig/gbif/2024-10-01/**") | |
(gbif | |
.filter(_["class"]=="Aves") | |
.rename(hex = "h3") | |
.group_by(_.hex) | |
.agg(n = _.count()) | |
.mutate(logn = _.n.log()) | |
.mutate(value = (255 * _.logn / _.logn.max()).cast("int")) | |
.to_parquet("s3://cboettig/gbif/csv/test.parquet") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment