Skip to content

Instantly share code, notes, and snippets.

@xevix
Last active February 12, 2025 21:08
Show Gist options
  • Save xevix/83ad1ee42c3e9b5f7424d38a44c69075 to your computer and use it in GitHub Desktop.
Save xevix/83ad1ee42c3e9b5f7424d38a44c69075 to your computer and use it in GitHub Desktop.
import pandas as pd
import duckdb
import pygrib
import sys
# Testing data from: https://data.ecmwf.int/forecasts/20250211/00z/aifs/0p25/oper/
# ECMWF is the European Centre for Medium-Range Weather Forecasts.
def grib_to_df(grb):
attrs = str(grb).split(":")
d, lat, lon = grb.data()
rows = [
(*attrs, (lon[i][j], lat[i][j]), d[i][j])
for i in range(len(lat))
for j in range(len(lat[i]))
]
df = pd.DataFrame(rows, columns=["num", "name", "units", "ll", "unit_desc", "level", "fsct_time", "fsct_dt", "geom", "value"])
return df
def df_to_parquet(df, con, fname):
num = df["num"][0]
name = df["name"][0]
print(f'{num}:{name}: Copying...')
num_rows = con.execute(f"""
COPY (SELECT * REPLACE(st_point(geom[1], geom[2]) AS geom) FROM df)
TO '{fname}_{num}_{name}.parquet' (PARQUET_VERSION V2, COMPRESSION 'zstd')""").fetchall()[0][0]
print(f'{num}:{name}: Inserted {num_rows} rows')
def main(gribfile, msgnum):
grbs = pygrib.open(gribfile)
con = duckdb.connect()
con.execute("INSTALL spatial")
con.execute("LOAD spatial")
print(f"msgnum: {msgnum}")
for i, grb in enumerate(grbs, start=1):
if msgnum == 0 or i == msgnum:
print(f'Starting: {grb}')
df = grib_to_df(grb)
fname = gribfile.replace(".grib2", "")
df_to_parquet(df, con, fname)
if msgnum != 0:
break
con.close()
def usage(name):
print(f"usage: {name} filename.grib2 [msgnum]")
if __name__ == "__main__":
if len(sys.argv) < 2:
usage(sys.argv[0])
exit(1)
gribfile = sys.argv[1]
msgnum = int(sys.argv[2]) if len(sys.argv) > 2 else 0
main(gribfile, msgnum)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment