## https://developmentseed.org/obstore/latest/examples/fastapi/
# Example large Parquet file hosted in AWS open data
#store = S3Store("ookla-open-data", region="us-west-2", skip_signature=True)
#path = "parquet/performance/type=fixed/year=2024/quarter=1/2024-01-01_performance_fixed_tiles.parquet"
Sys.setenv("AWS_REGION" = "us-west-2")
Sys.setenv("AWS_NO_SIGN_REQUEST" = "YES")
#Sys.setenv("AWS_VIRTUAL_HOSTING" = "TRUE") ## true by default bucket.host vs host/bucket
library(gdalraster)
bucket <- "/vsis3/ookla-open-data"
vsi_read_dir(bucket)
## all
obs <- vsi_read_dir("/vsis3/ookla-open-data/parquet", recursive = TRUE)
length(obs)
## drill into what we're looking for ()
grep("2024-01-01", obs, value = TRUE)
(key <- grep("2024-01-01.*fixed", obs, value = TRUE))
## "performance/type=fixed/year=2024/quarter=1/2024-01-01_performance_fixed_tiles.parquet"
ds <- new(GDALVector, glue::glue("{bucket}/parquet/{key}"))
ds$getDriverLongName()
#[1] "(Geo)Parquet"
str(ds$getNextFeature())
# List of 12
# $ FID :integer64 1
# $ quadkey : chr "0022133222330012"
# $ tile : chr "POLYGON((-160.037841796875 70.6399478155463, -160.032348632812 70.6399478155463, -160.032348632812 70.638126730"| __truncated__
# $ tile_x : num -160
# $ tile_y : num 70.6
# $ avg_d_kbps :integer64 158062
# $ avg_u_kbps :integer64 14698
# $ avg_lat_ms :integer64 223
# $ avg_lat_down_ms: int 402
# $ avg_lat_up_ms : int 204
# $ tests :integer64 1
# $ devices :integer64 1
# - attr(*, "gis")=List of 5
# ..$ type : chr "vector"
# ..$ geom_column : chr(0)
# ..$ geom_col_type: chr(0)
# ..$ geom_col_srs : chr(0)
# ..$ geom_format : chr "NONE"
# - attr(*, "class")= chr [1:2] "OGRFeature" "list"
ds$close()
Last active
March 23, 2025 23:26
-
-
Save mdsumner/55de7c645dbfa0e364a24283876eb4d5 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
VSIFile, we can seek and read raw bytes from an object
Here's a real example where I warp to MEM, ingest with VSIFile, and write that load of COG bytes to object store:
https://github.com/mdsumner/pawsey-aad/blob/7c8ad261da764a79095150b9293bea86164aca67/acacia/sync-ghrsst.R#L57-L60