Last active
June 8, 2024 12:48
-
-
Save bennyistanto/20c8b544e6dc8b9aef11591c886ab592 to your computer and use it in GitHub Desktop.
Computes the maximum precipitation values for each dekad from the daily and multi-day accumulated datasets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xarray as xr | |
import numpy as np | |
import os | |
import glob | |
import pandas as pd | |
from tqdm import tqdm | |
# Define the input and output directories | |
max_calculations = [ | |
{"days": 1, "input_dir": "/mnt/d/temp/imerg/data/fdd_1day", "output_dir": "/mnt/d/temp/imerg/data/fddmaxd_1day"}, | |
{"days": 2, "input_dir": "/mnt/d/temp/imerg/data/fdd_2day", "output_dir": "/mnt/d/temp/imerg/data/fddmaxd_2day"}, | |
{"days": 3, "input_dir": "/mnt/d/temp/imerg/data/fdd_3day", "output_dir": "/mnt/d/temp/imerg/data/fddmaxd_3day"}, | |
{"days": 4, "input_dir": "/mnt/d/temp/imerg/data/fdd_4day", "output_dir": "/mnt/d/temp/imerg/data/fddmaxd_4day"}, | |
{"days": 5, "input_dir": "/mnt/d/temp/imerg/data/fdd_5day", "output_dir": "/mnt/d/temp/imerg/data/fddmaxd_5day"}, | |
] | |
# Directory to store combined yearly datasets | |
combined_year_dir = "/mnt/d/temp/imerg/data/combined_year_ds" | |
# Global variable to store user's choice | |
user_choice = None | |
def set_user_decision(): | |
"""Prompt user for decision on existing files and store it globally.""" | |
global user_choice | |
if user_choice is None: | |
decision = input("An output file already exists. Choose an action - Replace (R), Skip (S), Abort (A): ").upper() | |
while decision not in ['R', 'S', 'A']: | |
print("Invalid choice. Please choose again.") | |
decision = input("Choose an action - Replace (R), Skip (S), Abort (A): ").upper() | |
user_choice = decision | |
def create_combined_yearly_ds(input_dir, days): | |
"""Create combined yearly datasets and save them to disk.""" | |
# Check if input folder exists | |
if not os.path.exists(input_dir): | |
print(f"Input folder does not exist: {input_dir}") | |
exit(1) | |
os.makedirs(combined_year_dir, exist_ok=True) | |
# Create a list of files | |
file_list = sorted(glob.glob(os.path.join(input_dir, f"wld_cli_imerg_fdd_{days}day_*.nc4"))) | |
# Group files by year | |
files_by_year = {} | |
for file_path in file_list: | |
date_str = file_path.split('_')[-1].split('.')[0] | |
year = date_str[:4] | |
if year not in files_by_year: | |
files_by_year[year] = [] | |
files_by_year[year].append(file_path) | |
# Process each year separately and create combined dataset for each year | |
with tqdm(total=len(files_by_year), desc=f"Processing {days}-day data by year") as year_pbar: | |
for year, year_files in files_by_year.items(): | |
combined_ds_path = os.path.join(combined_year_dir, f"combined_{days}day_{year}.nc4") | |
if not os.path.exists(combined_ds_path): | |
with tqdm(total=len(year_files), desc=f"Combining datasets for {year}", leave=False) as combine_pbar: | |
datasets = [xr.open_dataset(f) for f in year_files] | |
combined_year_ds = xr.concat(datasets, dim='time') | |
combine_pbar.update(len(year_files)) | |
combined_year_ds.to_netcdf(combined_ds_path) | |
combined_year_ds.close() | |
for ds in datasets: | |
ds.close() | |
print(f"Combined dataset saved to {combined_ds_path}") | |
year_pbar.update(1) | |
def calculate_dekad_max(combined_ds): | |
"""Calculate maximum precipitation for each dekad (10-day period) within each month.""" | |
dekad_max_list = [] | |
for year in np.unique(combined_ds.time.dt.year.values): | |
for month in np.unique(combined_ds.time.dt.month.values): | |
month_data = combined_ds.sel(time=str(year) + '-' + str(month).zfill(2)) | |
if month_data.time.size > 0: | |
# Calculate number of days in the month, considering leap years | |
days_in_month = pd.Timestamp(f"{year}-{month:02d}-01").days_in_month | |
dekads = [(1, 10), (11, 20), (21, days_in_month)] | |
for start_day, end_day in dekads: | |
dekad_data = month_data.sel(time=slice(f"{year}-{month:02d}-{start_day:02d}", | |
f"{year}-{month:02d}-{end_day:02d}")) | |
if dekad_data.time.size > 0: | |
dekad_max = dekad_data.max(dim='time') | |
dekad_start_date = f"{year}-{month:02d}-{start_day:02d}" | |
dekad_max['time'] = pd.date_range(start=dekad_start_date, periods=1) | |
dekad_max_list.append(dekad_max) | |
return xr.concat(dekad_max_list, dim='time') | |
for calc in max_calculations: | |
days = calc["days"] | |
input_dir = calc["input_dir"] | |
output_dir = calc["output_dir"] | |
os.makedirs(output_dir, exist_ok=True) | |
# Create combined yearly datasets if they don't already exist | |
create_combined_yearly_ds(input_dir, days) | |
# Load and process each combined dataset by year | |
with tqdm(total=len(os.listdir(combined_year_dir)), desc=f"Calculating dekad max for {days}-day data") as year_pbar: | |
for combined_ds_file in os.listdir(combined_year_dir): | |
if f"combined_{days}day_" not in combined_ds_file: | |
continue | |
combined_ds_path = os.path.join(combined_year_dir, combined_ds_file) | |
combined_ds = xr.open_dataset(combined_ds_path) | |
# Calculate dekad max | |
dekad_max = calculate_dekad_max(combined_ds) | |
# Save each dekad's maximum as a separate file | |
for date in tqdm(dekad_max.time.values, desc=f"Saving {days}-day dekad max files", leave=False): | |
dekad_start_date = pd.Timestamp(date).strftime('%Y%m%d') | |
output_filename = f"wld_cli_imerg_fddmax_{days}day_{dekad_start_date}.nc4" | |
output_filepath = os.path.join(output_dir, output_filename) | |
if os.path.exists(output_filepath): | |
if user_choice is None: | |
set_user_decision() | |
if user_choice == 'S': | |
print(f"Skipping existing file: {output_filepath}") | |
continue # Skip to the next file | |
elif user_choice == 'A': | |
print("Aborting process.") | |
exit(0) # Exit the script | |
elif user_choice == 'R': | |
pass # Replace the file | |
try: | |
ds = dekad_max.sel(time=date) | |
ds = ds.expand_dims(time=[date]) # Add the time dimension with the correct date | |
# Ensure CF conventions and compression | |
encoding = {var: {'zlib': True, 'complevel': 5} for var in ds.data_vars} | |
ds.attrs['Conventions'] = 'CF-1.8' | |
ds.to_netcdf(output_filepath, encoding=encoding) | |
print(f"Saved: {output_filepath}") | |
except Exception as e: | |
print(f"Error saving file {output_filepath}: {e}") | |
print(f"{days}-day dekad max processing complete for {combined_ds_file}.") | |
year_pbar.update(1) | |
print("All dekad max calculations processed.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment