Skip to content

Instantly share code, notes, and snippets.

@jgomezdans
Created November 14, 2025 17:49
Show Gist options
  • Select an option

  • Save jgomezdans/3835f87a1ed6365cbcb4cc1ca5e62696 to your computer and use it in GitHub Desktop.

Select an option

Save jgomezdans/3835f87a1ed6365cbcb4cc1ca5e62696 to your computer and use it in GitHub Desktop.
from __future__ import annotations
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
def save_db_site_to_dnb_netcdf(
df: pd.DataFrame,
lat: float,
lon: float,
out_path: str | Path,
) -> Path:
"""Save D&B site forcing to NetCDF with expected D&B layout.
Parameters
----------
df : pandas.DataFrame
Hourly site data with a DatetimeIndex in UTC and columns:
- 'T' : 2 m air temperature [K]
- 'SoilT' : soil temperature at rooting depth [K]
- 'Rsw' : incoming shortwave radiation [J m-2 h-1]
- 'LW downwelling' : incoming longwave radiation [J m-2 h-1]
- 'P' : precipitation [m h-1]
- 'DEM' : site elevation [m] (used as metadata)
Extra columns (e.g. U10, V10) are ignored here.
lat : float
Site latitude [degrees north].
lon : float
Site longitude [degrees east].
out_path : str or pathlib.Path
Output NetCDF filename.
Returns
-------
pathlib.Path
Path to the written NetCDF file.
"""
out_path = Path(out_path)
# ----- Time handling -------------------------------------------------
if not isinstance(df.index, pd.DatetimeIndex):
raise ValueError("df.index must be a DatetimeIndex.")
t = df.index
# Ensure UTC, then drop tz for storage
if t.tz is None:
t = t.tz_localize("UTC")
else:
t = t.tz_convert("UTC")
t = t.tz_localize(None)
ntime = len(t)
# yyyymmddhh as an integer-coded time (1 x ntime)
yyyymmddhh_vals = np.array(
[int(ts.strftime("%Y%m%d%H")) for ts in t], dtype="float64"
)[None, :] # shape (1, time)
# ----- Data arrays (ng = 1) -----------------------------------------
ng = 1
ntc = 1 # single time series
def _col(name: str) -> np.ndarray:
if name not in df.columns:
raise KeyError(f"Expected column '{name}' in df.")
return df[name].to_numpy(dtype="float64")
temperature = _col("T")[None, :] # (ng, time)
soil_temperature = _col("SoilT")[None, :] # (ng, time)
swrad = _col("Rsw")[None, :] # (ng, time)
lwdown = _col("LW downwelling")[None, :] # (ng, time)
precipitation = _col("P")[None, :] # (ng, time)
# Assumption: no outgoing LW available, so lwnet == lwdown
lwnet = lwdown.copy()
# DEM as scalar (from first row)
dem = float(df["DEM"].iloc[0]) if "DEM" in df.columns else np.nan
# ----- Build Dataset -------------------------------------------------
ds = xr.Dataset(
data_vars={
"yyyymmddhh": (("ntc", "time"), yyyymmddhh_vals),
"temperature": (("ng", "time"), temperature),
"swrad": (("ng", "time"), swrad),
"precipitation": (("ng", "time"), precipitation),
"soil_temperature": (("ng", "time"), soil_temperature),
"lwdown": (("ng", "time"), lwdown),
"lwnet": (("ng", "time"), lwnet),
},
coords={
"time": ("time", t),
"lat": ("ng", np.array([lat], dtype="float64")),
"lon": ("ng", np.array([lon], dtype="float64")),
"ntc": ("ntc", np.arange(ntc, dtype="int32")),
"ng": ("ng", np.arange(ng, dtype="int32")),
},
)
# ----- Attributes / units -------------------------------------------
ds["yyyymmddhh"].attrs.update(
{
"long_name": "time in YYYYMMDDHH (UTC)",
"units": "YYYYMMDDHH",
}
)
ds["lat"].attrs.update(
{"long_name": "latitude", "units": "degrees_north"}
)
ds["lon"].attrs.update(
{"long_name": "longitude", "units": "degrees_east"}
)
ds["temperature"].attrs.update(
{
"long_name": "2 m air temperature",
"units": "K",
"source": "ERA5-Land / D&B forcing",
}
)
ds["soil_temperature"].attrs.update(
{
"long_name": "soil temperature (level 3)",
"units": "K",
"comment": "Approx. rooting depth layer.",
}
)
ds["swrad"].attrs.update(
{
"long_name": "incoming shortwave radiation",
"units": "J m-2",
"comment": (
"Accumulated over the preceding hour. "
"Divide by 3600 for W m-2."
),
}
)
ds["precipitation"].attrs.update(
{
"long_name": "total precipitation",
"units": "m",
"comment": "Liquid-equivalent depth over the preceding hour.",
}
)
ds["lwdown"].attrs.update(
{
"long_name": "incoming longwave radiation",
"units": "J m-2",
"comment": (
"Accumulated over the preceding hour. "
"Divide by 3600 for W m-2."
),
}
)
ds["lwnet"].attrs.update(
{
"long_name": "net longwave radiation (down - up)",
"units": "J m-2",
"comment": (
"Outgoing LW not available; here lwnet == lwdown. "
"Adjust if an LW upwelling term becomes available."
),
}
)
ds.attrs.update(
{
"title": "D&B site forcing",
"Conventions": "CF-1.8 (partial)",
"site_lat": float(lat),
"site_lon": float(lon),
"site_elevation_m": dem,
}
)
# Compression / time encoding
encoding = {
"temperature": {"zlib": True, "complevel": 4, "dtype": "float32"},
"swrad": {"zlib": True, "complevel": 4, "dtype": "float32"},
"precipitation": {"zlib": True, "complevel": 4, "dtype": "float32"},
"soil_temperature": {"zlib": True, "complevel": 4, "dtype": "float32"},
"lwdown": {"zlib": True, "complevel": 4, "dtype": "float32"},
"lwnet": {"zlib": True, "complevel": 4, "dtype": "float32"},
"yyyymmddhh": {"dtype": "float64"},
"time": {
"units": "seconds since 1970-01-01 00:00:00",
"calendar": "standard",
},
}
ds.to_netcdf(out_path, encoding=encoding)
return out_path
if __name__ == "__main__":
import meteo_data_gee
# You need to define your GEE project name...
# Store in varabie EARTHENGINE_PROJECT_NAME
# First grab the data...
df_db = meteo_data_gee.get_meteo_data(
loc=(51.8089, -0.3566), #Rothamsted!
model="db",
year=2020,
ee_project=EARTHENGINE_PROJECT_NAME,
)
# Then dump it to a netcdf file
save_db_site_to_dnb_netcdf(
df=df_db, # with columns T, SoilT, Rsw, LW downwelling, P, DEM
lat=lat,
lon=lon,
out_path="/tmp/db_rothamsted.nc",
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment