Last active
August 23, 2024 18:16
-
-
Save cboettig/da8dc9889695aad177b1191402b01625 to your computer and use it in GitHub Desktop.
ibis geospatial I/O
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ibis | |
from ibis import _ | |
con = ibis.duckdb.connect(extensions=["spatial"]) | |
## Read a geospatial file other than parquet (via GDAL) | |
rivers_geojson = "https://data.source.coop/cboettig/us-rivers/nhd_flowline_national.geojson" | |
rivers = con.read_geo(rivers_geojson) | |
## Read geoparquet | |
geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet" | |
import ibis | |
from ibis import _ | |
con = ibis.duckdb.connect("duck.db", extensions = ["spatial"]) | |
crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"}) | |
df = crops.to_pandas() | |
## Read tabular data with lat/lon as geospatial data | |
parquet = "s3://anonymous@gbif-open-data-us-east-1/occurrence/2024-07-01/occurrence.parquet/**" | |
gbif = con.read_parquet(parquet, "gbif") | |
## lat-lon data to h3 | |
con = ibis.duckdb.connect(extensions=["h3", "spatial", "httpfs"]) | |
# register h3's builtin methods: | |
@ibis.udf.scalar.builtin | |
def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int: | |
... | |
@ibis.udf.scalar.builtin | |
def hex(array) -> str: | |
... | |
@ibis.udf.scalar.builtin | |
def h3_cell_to_boundary_wkt (array) -> str: | |
... | |
parquet = "s3://anonymous@gbif-open-data-us-east-1/occurrence/2024-07-01/occurrence.parquet/**" | |
gbif = con.read_parquet(parquet, "gbif") | |
# zoom level 5 | |
gbif.mutate(h5 = hex(h3_latlng_to_cell(_.decimallatitude, _.decimallongitude, 5) | |
## write to a geospatial format other than parquet | |
(con | |
.read_parquet(rivers_parquet) | |
.head(100) | |
.to_geo("test.geojson") | |
) | |
## write as geoparquet | |
read_parquet(rivers_parquet).to_parquet("rivers_ca.parquet") | |
## write as duckdb 'native-spatial' parquet | |
## Filter by a polygon | |
import geopandas as gpd | |
states = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_1_states_provinces.geojson") | |
ca_polygon = states[states.name == 'California'] | |
ca_poly_expr = ibis.literal(ca_polygon.geometry.iloc[0]) | |
(rivers | |
.filter( _.geometry.within(ca_poly_expr)) | |
).to_parquet("rivers_ca.parquet") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment