Created
February 28, 2024 15:02
-
-
Save joshmoore/715b6cb74e74fce4feac7c610eef4d96 to your computer and use it in GitHub Desktop.
GBIF Occurrence Map Generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SQLContext | |
from pyspark.sql.functions import pow, lit | |
from pyspark.sql.types import LongType | |
from pyspark.sql.functions import col, SparkContext | |
# Primary argument: how much to truncate the long & lat | |
num_places = 1 | |
# https://data-blog.gbif.org/post/aws-and-gbif/ | |
# Download and unzip one of the files | |
sc = SparkContext("local", "test") | |
sqlContext = SQLContext(sc) | |
gbif_snapshot_path = "occurrence.parquet/*" | |
df = sqlContext.read.parquet(gbif_snapshot_path) | |
export_df = df.select("decimallatitude", "decimallongitude") | |
m = pow(lit(10), num_places).cast(LongType()) | |
trunc_df = ( | |
export_df.withColumn("lat", (col("decimallatitude") * m).cast(LongType()) / m) | |
.withColumn("long", (col("decimallongitude") * m).cast(LongType()) / m) | |
.drop("decimallatitude") | |
.drop("decimallongitude") | |
) | |
trunc_df.write.mode("overwrite").parquet("export") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SQLContext | |
from pyspark.sql.functions import SparkContext | |
# Filter the coordinates and add a count column | |
sc = SparkContext("local", "test") | |
sqlContext = SQLContext(sc) | |
gbif_snapshot_path = "export/*" | |
df = sqlContext.read.parquet(gbif_snapshot_path) | |
cleanup = "!((lat == 0.0 and long == 0.0) or (lat is NULL and long is NULL))" | |
counts = ( | |
df.filter(cleanup) | |
.groupBy("lat", "long") | |
.count() | |
) | |
counts.write.mode("overwrite").parquet("counts") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import pandas as pd | |
import hvplot.pandas # noqa | |
import holoviews as hv | |
# This assumes the data has been sufficiently reduced | |
f = glob.glob("counts/*.parquet") | |
df = pd.read_parquet(f) | |
dmap = df.hvplot.scatter( | |
x="long", y="lat", rasterize=True, cnorm="eq_hist" | |
) # , cmap="gouldian") | |
hv.save(dmap, fmt="png", filename="map.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment