Skip to content

Instantly share code, notes, and snippets.

@cboettig
Created November 9, 2024 00:45
Show Gist options
  • Save cboettig/4f31ca4cea7a1352129a67654ad99820 to your computer and use it in GitHub Desktop.
Save cboettig/4f31ca4cea7a1352129a67654ad99820 to your computer and use it in GitHub Desktop.
NRP use of S3 + duckdb
%%time
import ibis
from ibis import _
con = ibis.duckdb.connect()
endpoint="rook-ceph-rgw-nautiluss3.rook"
query= f'''
CREATE OR REPLACE SECRET secret2 (
TYPE S3,
KEY_ID '{access_key}',
SECRET '{secret_key}',
ENDPOINT '{endpoint}',
URL_STYLE 'path',
USE_SSL 'FALSE'
);
'''
con.raw_sql(query)
gbif = con.read_parquet("s3://cboettig/gbif/2024-10-01/**")
(gbif
.filter(_["class"]=="Aves")
.rename(hex = "h3")
.group_by(_.hex)
.agg(n = _.count())
.mutate(logn = _.n.log())
.mutate(value = (255 * _.logn / _.logn.max()).cast("int"))
.to_parquet("s3://cboettig/gbif/csv/test.parquet")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment