Skip to content

Instantly share code, notes, and snippets.

@cboettig
Last active August 23, 2024 18:16
Show Gist options
  • Save cboettig/da8dc9889695aad177b1191402b01625 to your computer and use it in GitHub Desktop.
Save cboettig/da8dc9889695aad177b1191402b01625 to your computer and use it in GitHub Desktop.
ibis geospatial I/O
import ibis
from ibis import _
con = ibis.duckdb.connect(extensions=["spatial"])
## Read a geospatial file other than parquet (via GDAL)
rivers_geojson = "https://data.source.coop/cboettig/us-rivers/nhd_flowline_national.geojson"
rivers = con.read_geo(rivers_geojson)
## Read geoparquet
geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
import ibis
from ibis import _
con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
df = crops.to_pandas()
## Read tabular data with lat/lon as geospatial data
parquet = "s3://anonymous@gbif-open-data-us-east-1/occurrence/2024-07-01/occurrence.parquet/**"
gbif = con.read_parquet(parquet, "gbif")
## lat-lon data to h3
con = ibis.duckdb.connect(extensions=["h3", "spatial", "httpfs"])
# register h3's builtin methods:
@ibis.udf.scalar.builtin
def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int:
...
@ibis.udf.scalar.builtin
def hex(array) -> str:
...
@ibis.udf.scalar.builtin
def h3_cell_to_boundary_wkt (array) -> str:
...
parquet = "s3://anonymous@gbif-open-data-us-east-1/occurrence/2024-07-01/occurrence.parquet/**"
gbif = con.read_parquet(parquet, "gbif")
# zoom level 5
gbif.mutate(h5 = hex(h3_latlng_to_cell(_.decimallatitude, _.decimallongitude, 5)
## write to a geospatial format other than parquet
(con
.read_parquet(rivers_parquet)
.head(100)
.to_geo("test.geojson")
)
## write as geoparquet
read_parquet(rivers_parquet).to_parquet("rivers_ca.parquet")
## write as duckdb 'native-spatial' parquet
## Filter by a polygon
import geopandas as gpd
states = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_1_states_provinces.geojson")
ca_polygon = states[states.name == 'California']
ca_poly_expr = ibis.literal(ca_polygon.geometry.iloc[0])
(rivers
.filter( _.geometry.within(ca_poly_expr))
).to_parquet("rivers_ca.parquet")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment