Created
November 14, 2025 17:49
-
-
Save jgomezdans/3835f87a1ed6365cbcb4cc1ca5e62696 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import annotations | |
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| import xarray as xr | |
| def save_db_site_to_dnb_netcdf( | |
| df: pd.DataFrame, | |
| lat: float, | |
| lon: float, | |
| out_path: str | Path, | |
| ) -> Path: | |
| """Save D&B site forcing to NetCDF with expected D&B layout. | |
| Parameters | |
| ---------- | |
| df : pandas.DataFrame | |
| Hourly site data with a DatetimeIndex in UTC and columns: | |
| - 'T' : 2 m air temperature [K] | |
| - 'SoilT' : soil temperature at rooting depth [K] | |
| - 'Rsw' : incoming shortwave radiation [J m-2 h-1] | |
| - 'LW downwelling' : incoming longwave radiation [J m-2 h-1] | |
| - 'P' : precipitation [m h-1] | |
| - 'DEM' : site elevation [m] (used as metadata) | |
| Extra columns (e.g. U10, V10) are ignored here. | |
| lat : float | |
| Site latitude [degrees north]. | |
| lon : float | |
| Site longitude [degrees east]. | |
| out_path : str or pathlib.Path | |
| Output NetCDF filename. | |
| Returns | |
| ------- | |
| pathlib.Path | |
| Path to the written NetCDF file. | |
| """ | |
| out_path = Path(out_path) | |
| # ----- Time handling ------------------------------------------------- | |
| if not isinstance(df.index, pd.DatetimeIndex): | |
| raise ValueError("df.index must be a DatetimeIndex.") | |
| t = df.index | |
| # Ensure UTC, then drop tz for storage | |
| if t.tz is None: | |
| t = t.tz_localize("UTC") | |
| else: | |
| t = t.tz_convert("UTC") | |
| t = t.tz_localize(None) | |
| ntime = len(t) | |
| # yyyymmddhh as an integer-coded time (1 x ntime) | |
| yyyymmddhh_vals = np.array( | |
| [int(ts.strftime("%Y%m%d%H")) for ts in t], dtype="float64" | |
| )[None, :] # shape (1, time) | |
| # ----- Data arrays (ng = 1) ----------------------------------------- | |
| ng = 1 | |
| ntc = 1 # single time series | |
| def _col(name: str) -> np.ndarray: | |
| if name not in df.columns: | |
| raise KeyError(f"Expected column '{name}' in df.") | |
| return df[name].to_numpy(dtype="float64") | |
| temperature = _col("T")[None, :] # (ng, time) | |
| soil_temperature = _col("SoilT")[None, :] # (ng, time) | |
| swrad = _col("Rsw")[None, :] # (ng, time) | |
| lwdown = _col("LW downwelling")[None, :] # (ng, time) | |
| precipitation = _col("P")[None, :] # (ng, time) | |
| # Assumption: no outgoing LW available, so lwnet == lwdown | |
| lwnet = lwdown.copy() | |
| # DEM as scalar (from first row) | |
| dem = float(df["DEM"].iloc[0]) if "DEM" in df.columns else np.nan | |
| # ----- Build Dataset ------------------------------------------------- | |
| ds = xr.Dataset( | |
| data_vars={ | |
| "yyyymmddhh": (("ntc", "time"), yyyymmddhh_vals), | |
| "temperature": (("ng", "time"), temperature), | |
| "swrad": (("ng", "time"), swrad), | |
| "precipitation": (("ng", "time"), precipitation), | |
| "soil_temperature": (("ng", "time"), soil_temperature), | |
| "lwdown": (("ng", "time"), lwdown), | |
| "lwnet": (("ng", "time"), lwnet), | |
| }, | |
| coords={ | |
| "time": ("time", t), | |
| "lat": ("ng", np.array([lat], dtype="float64")), | |
| "lon": ("ng", np.array([lon], dtype="float64")), | |
| "ntc": ("ntc", np.arange(ntc, dtype="int32")), | |
| "ng": ("ng", np.arange(ng, dtype="int32")), | |
| }, | |
| ) | |
| # ----- Attributes / units ------------------------------------------- | |
| ds["yyyymmddhh"].attrs.update( | |
| { | |
| "long_name": "time in YYYYMMDDHH (UTC)", | |
| "units": "YYYYMMDDHH", | |
| } | |
| ) | |
| ds["lat"].attrs.update( | |
| {"long_name": "latitude", "units": "degrees_north"} | |
| ) | |
| ds["lon"].attrs.update( | |
| {"long_name": "longitude", "units": "degrees_east"} | |
| ) | |
| ds["temperature"].attrs.update( | |
| { | |
| "long_name": "2 m air temperature", | |
| "units": "K", | |
| "source": "ERA5-Land / D&B forcing", | |
| } | |
| ) | |
| ds["soil_temperature"].attrs.update( | |
| { | |
| "long_name": "soil temperature (level 3)", | |
| "units": "K", | |
| "comment": "Approx. rooting depth layer.", | |
| } | |
| ) | |
| ds["swrad"].attrs.update( | |
| { | |
| "long_name": "incoming shortwave radiation", | |
| "units": "J m-2", | |
| "comment": ( | |
| "Accumulated over the preceding hour. " | |
| "Divide by 3600 for W m-2." | |
| ), | |
| } | |
| ) | |
| ds["precipitation"].attrs.update( | |
| { | |
| "long_name": "total precipitation", | |
| "units": "m", | |
| "comment": "Liquid-equivalent depth over the preceding hour.", | |
| } | |
| ) | |
| ds["lwdown"].attrs.update( | |
| { | |
| "long_name": "incoming longwave radiation", | |
| "units": "J m-2", | |
| "comment": ( | |
| "Accumulated over the preceding hour. " | |
| "Divide by 3600 for W m-2." | |
| ), | |
| } | |
| ) | |
| ds["lwnet"].attrs.update( | |
| { | |
| "long_name": "net longwave radiation (down - up)", | |
| "units": "J m-2", | |
| "comment": ( | |
| "Outgoing LW not available; here lwnet == lwdown. " | |
| "Adjust if an LW upwelling term becomes available." | |
| ), | |
| } | |
| ) | |
| ds.attrs.update( | |
| { | |
| "title": "D&B site forcing", | |
| "Conventions": "CF-1.8 (partial)", | |
| "site_lat": float(lat), | |
| "site_lon": float(lon), | |
| "site_elevation_m": dem, | |
| } | |
| ) | |
| # Compression / time encoding | |
| encoding = { | |
| "temperature": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "swrad": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "precipitation": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "soil_temperature": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "lwdown": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "lwnet": {"zlib": True, "complevel": 4, "dtype": "float32"}, | |
| "yyyymmddhh": {"dtype": "float64"}, | |
| "time": { | |
| "units": "seconds since 1970-01-01 00:00:00", | |
| "calendar": "standard", | |
| }, | |
| } | |
| ds.to_netcdf(out_path, encoding=encoding) | |
| return out_path | |
| if __name__ == "__main__": | |
| import meteo_data_gee | |
| # You need to define your GEE project name... | |
| # Store in varabie EARTHENGINE_PROJECT_NAME | |
| # First grab the data... | |
| df_db = meteo_data_gee.get_meteo_data( | |
| loc=(51.8089, -0.3566), #Rothamsted! | |
| model="db", | |
| year=2020, | |
| ee_project=EARTHENGINE_PROJECT_NAME, | |
| ) | |
| # Then dump it to a netcdf file | |
| save_db_site_to_dnb_netcdf( | |
| df=df_db, # with columns T, SoilT, Rsw, LW downwelling, P, DEM | |
| lat=lat, | |
| lon=lon, | |
| out_path="/tmp/db_rothamsted.nc", | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment