Last active
September 19, 2025 05:50
-
-
Save dlebauer/a093e6a13aa6adb5eef36421fe5efa28 to your computer and use it in GitHub Desktop.
Concatenate PEcAn netcdf files. Solves same problem as https://github.com/PecanProject/pecan/pull/3620 but with nco
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # This is a prototype with minimal testing. | |
| # | |
| # This script concatenates individual PEcAn NetCDF outputs from an ensemble run into a single file. | |
| # It assumes that each ensemble folder is named in the pattern: ENS-<ensemble>-<site> | |
| # and that each folder contains yearly NetCDF files (e.g., 2019.nc, 2020.nc, etc.) | |
| ## TODO | |
| ## 1. confirm that temporary files are cleaned up | |
| ## find files then remove, but don't use alias rm | |
| ## find "$MERGED_TIME_DIR" -type f -name "*.nc" -exec rm -f {} + | |
| ## find "$MERGED_ENSEMBLE_DIR" -type f -name "*.nc" -exec rm -f {} + | |
| ## 2. use gnu parallel | |
| ## 3. use make or something similar so that the job can restart after being interrupted | |
| # geo.bu.edu requires nco dependencies with specific versions | |
| module load udunits/2.2.26 netcdf/4.6.1 nco/4.7.8 | |
| # Base out directory (adjust if needed) | |
| OUTDIR="/projectnb/dietzelab/ccmmf/ccmmf_phase_1b_98sites_20reps_20250312/out" | |
| # Create intermediate directories for merged files | |
| MERGED_TIME_DIR="${OUTDIR}/merged_time" | |
| MERGED_ENSEMBLE_DIR="${OUTDIR}/merged_ensemble" | |
| mkdir -p "$MERGED_TIME_DIR" | |
| mkdir -p "$MERGED_ENSEMBLE_DIR" | |
| echo "=== Starting PEcAn NetCDF Ensemble Concatenation Process ===" | |
| echo "Using OUTDIR: $OUTDIR" | |
| echo "### Step 1: Concatenating time slices for each ensemble folder ###" | |
| for folder in "${OUTDIR}"/ENS-*; do | |
| if [ -d "$folder" ]; then | |
| folder_basename=$(basename "$folder") | |
| echo "Processing folder: $folder_basename" | |
| # Merge all .nc files in the folder along the time (record) dimension. | |
| ncrcat -O "$folder"/*.nc "$folder/merged_time.nc" | |
| if [ $? -ne 0 ]; then | |
| echo "Error merging time slices in folder $folder_basename. Skipping." | |
| continue | |
| fi | |
| echo "Merged time slices for $folder_basename." | |
| # Copy the merged file to the MERGED_TIME_DIR for later use. | |
| target_time_file="${MERGED_TIME_DIR}/${folder_basename}_merged_time.nc" | |
| cp "$folder/merged_time.nc" "$target_time_file" | |
| if [ $? -eq 0 ]; then | |
| echo "Copied merged file to $target_time_file." | |
| else | |
| echo "Failed to copy merged file for $folder_basename." | |
| fi | |
| # Remove the intermediate merged_time.nc file from the ensemble folder. | |
| rm "$folder/merged_time.nc" | |
| fi | |
| done | |
| echo "### Step 2: Concatenating ensembles for each site ###" | |
| # Extract unique site IDs from the folder names. | |
| # Folder name format: ENS-<ensemble>-<site> | |
| sites=$(for d in "${OUTDIR}"/ENS-*; do | |
| basename "$d" | cut -d'-' -f3; | |
| done | sort | uniq) | |
| for site in $sites; do | |
| echo "Processing site: $site" | |
| # Get list of merged_time files for this site. | |
| files=() | |
| for f in "${MERGED_TIME_DIR}"/ENS-*-${site}_merged_time.nc; do | |
| [ -e "$f" ] && files+=("$f") | |
| done | |
| if [ ${#files[@]} -gt 0 ]; then | |
| echo "Concatenating ${#files[@]} ensemble files for site $site..." | |
| target_ensemble_file="${MERGED_ENSEMBLE_DIR}/merged_ensemble_${site}.nc" | |
| ncecat -O -u ensemble "${files[@]}" "$target_ensemble_file" | |
| if [ $? -eq 0 ]; then | |
| echo "Ensemble files concatenated to $target_ensemble_file." | |
| else | |
| echo "Error concatenating ensemble files for site $site." | |
| fi | |
| else | |
| echo "No merged time files found for site: $site. Skipping." | |
| fi | |
| done | |
| echo "### Step 3: Concatenating across all sites ###" | |
| final_merged_file="${OUTDIR}/final_merged.nc" | |
| files_all=() | |
| for f in "${MERGED_ENSEMBLE_DIR}"/merged_ensemble_*.nc; do | |
| [ -e "$f" ] && files_all+=("$f") | |
| done | |
| if [ ${#files_all[@]} -gt 0 ]; then | |
| echo "Concatenating ${#files_all[@]} merged ensemble files across all sites..." | |
| ncecat -O -u site "${files_all[@]}" "$final_merged_file" | |
| if [ $? -eq 0 ]; then | |
| echo "Final merged file created at: $final_merged_file" | |
| echo "Cleaning up intermediate directories." | |
| rm -rf "$MERGED_TIME_DIR" "$MERGED_ENSEMBLE_DIR" | |
| else | |
| echo "Error creating final merged file. Intermediate files retained for debugging." | |
| fi | |
| else | |
| echo "No merged ensemble files found. Exiting without final concatenation." | |
| fi | |
| echo "=== EFI NetCDF Concatenation Process Completed ===" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment