Skip to content

Instantly share code, notes, and snippets.

@dlebauer
Last active September 19, 2025 05:50
Show Gist options
  • Save dlebauer/a093e6a13aa6adb5eef36421fe5efa28 to your computer and use it in GitHub Desktop.
Save dlebauer/a093e6a13aa6adb5eef36421fe5efa28 to your computer and use it in GitHub Desktop.
Concatenate PEcAn netcdf files. Solves same problem as https://github.com/PecanProject/pecan/pull/3620 but with nco
#!/bin/bash
# This is a prototype with minimal testing.
#
# This script concatenates individual PEcAn NetCDF outputs from an ensemble run into a single file.
# It assumes that each ensemble folder is named in the pattern: ENS-<ensemble>-<site>
# and that each folder contains yearly NetCDF files (e.g., 2019.nc, 2020.nc, etc.)
## TODO
## 1. confirm that temporary files are cleaned up
## find files then remove, but don't use alias rm
## find "$MERGED_TIME_DIR" -type f -name "*.nc" -exec rm -f {} +
## find "$MERGED_ENSEMBLE_DIR" -type f -name "*.nc" -exec rm -f {} +
## 2. use gnu parallel
## 3. use make or something similar so that the job can restart after being interrupted
# geo.bu.edu requires nco dependencies with specific versions
module load udunits/2.2.26 netcdf/4.6.1 nco/4.7.8
# Base out directory (adjust if needed)
OUTDIR="/projectnb/dietzelab/ccmmf/ccmmf_phase_1b_98sites_20reps_20250312/out"
# Create intermediate directories for merged files
MERGED_TIME_DIR="${OUTDIR}/merged_time"
MERGED_ENSEMBLE_DIR="${OUTDIR}/merged_ensemble"
mkdir -p "$MERGED_TIME_DIR"
mkdir -p "$MERGED_ENSEMBLE_DIR"
echo "=== Starting PEcAn NetCDF Ensemble Concatenation Process ==="
echo "Using OUTDIR: $OUTDIR"
echo "### Step 1: Concatenating time slices for each ensemble folder ###"
for folder in "${OUTDIR}"/ENS-*; do
if [ -d "$folder" ]; then
folder_basename=$(basename "$folder")
echo "Processing folder: $folder_basename"
# Merge all .nc files in the folder along the time (record) dimension.
ncrcat -O "$folder"/*.nc "$folder/merged_time.nc"
if [ $? -ne 0 ]; then
echo "Error merging time slices in folder $folder_basename. Skipping."
continue
fi
echo "Merged time slices for $folder_basename."
# Copy the merged file to the MERGED_TIME_DIR for later use.
target_time_file="${MERGED_TIME_DIR}/${folder_basename}_merged_time.nc"
cp "$folder/merged_time.nc" "$target_time_file"
if [ $? -eq 0 ]; then
echo "Copied merged file to $target_time_file."
else
echo "Failed to copy merged file for $folder_basename."
fi
# Remove the intermediate merged_time.nc file from the ensemble folder.
rm "$folder/merged_time.nc"
fi
done
echo "### Step 2: Concatenating ensembles for each site ###"
# Extract unique site IDs from the folder names.
# Folder name format: ENS-<ensemble>-<site>
sites=$(for d in "${OUTDIR}"/ENS-*; do
basename "$d" | cut -d'-' -f3;
done | sort | uniq)
for site in $sites; do
echo "Processing site: $site"
# Get list of merged_time files for this site.
files=()
for f in "${MERGED_TIME_DIR}"/ENS-*-${site}_merged_time.nc; do
[ -e "$f" ] && files+=("$f")
done
if [ ${#files[@]} -gt 0 ]; then
echo "Concatenating ${#files[@]} ensemble files for site $site..."
target_ensemble_file="${MERGED_ENSEMBLE_DIR}/merged_ensemble_${site}.nc"
ncecat -O -u ensemble "${files[@]}" "$target_ensemble_file"
if [ $? -eq 0 ]; then
echo "Ensemble files concatenated to $target_ensemble_file."
else
echo "Error concatenating ensemble files for site $site."
fi
else
echo "No merged time files found for site: $site. Skipping."
fi
done
echo "### Step 3: Concatenating across all sites ###"
final_merged_file="${OUTDIR}/final_merged.nc"
files_all=()
for f in "${MERGED_ENSEMBLE_DIR}"/merged_ensemble_*.nc; do
[ -e "$f" ] && files_all+=("$f")
done
if [ ${#files_all[@]} -gt 0 ]; then
echo "Concatenating ${#files_all[@]} merged ensemble files across all sites..."
ncecat -O -u site "${files_all[@]}" "$final_merged_file"
if [ $? -eq 0 ]; then
echo "Final merged file created at: $final_merged_file"
echo "Cleaning up intermediate directories."
rm -rf "$MERGED_TIME_DIR" "$MERGED_ENSEMBLE_DIR"
else
echo "Error creating final merged file. Intermediate files retained for debugging."
fi
else
echo "No merged ensemble files found. Exiting without final concatenation."
fi
echo "=== EFI NetCDF Concatenation Process Completed ==="
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment