Created
June 8, 2024 07:04
-
-
Save bennyistanto/b8dfbd0d0c700d22c82f48ec5b334006 to your computer and use it in GitHub Desktop.
Calculates the thresholds for extreme rainfall events using both percentile-based and Generalized Extreme Value (GEV) methods
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xarray as xr | |
import numpy as np | |
import os | |
import glob | |
import pandas as pd | |
from tqdm import tqdm | |
from scipy.stats import genextreme | |
# Define directories and filenames for threshold calculations | |
threshold_calculations = [ | |
{"days": 1, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_1day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_1day"}, | |
{"days": 2, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_2day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_2day"}, | |
{"days": 3, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_3day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_3day"}, | |
{"days": 4, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_4day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_4day"}, | |
{"days": 5, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_5day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_5day"}, | |
] | |
# Threshold definitions | |
threshold_definitions = [ | |
{"name": "moderate", "percentile": 50, "years": 2, "output_suffix": "02yr"}, | |
{"name": "heavy", "percentile": 80, "years": 5, "output_suffix": "05yr"}, | |
{"name": "intense", "percentile": None, "years": 10, "output_suffix": "10yr"}, | |
{"name": "extreme", "percentile": None, "years": 25, "output_suffix": "25yr"}, | |
] | |
# Global variable to store user's choice | |
user_choice = None | |
def set_user_decision(): | |
""" | |
Prompt user for decision on existing files and store it globally. | |
This function prompts the user to decide whether to replace, skip, or abort | |
if an output file already exists. The decision is stored in a global variable | |
to be used throughout the script. | |
""" | |
global user_choice | |
if user_choice is None: | |
decision = input("An output file already exists. Choose an action - Replace (R), Skip (S), Abort (A): ").upper() | |
while decision not in ['R', 'S', 'A']: | |
print("Invalid choice. Please choose again.") | |
decision = input("Choose an action - Replace (R), Skip (S), Abort (A): ").upper() | |
user_choice = decision | |
def calculate_percentile_threshold(data, percentile): | |
""" | |
Calculate percentile threshold for given data. | |
Parameters: | |
data (numpy.ndarray): The input precipitation data. | |
percentile (int): The percentile to calculate (e.g., 50 for P50). | |
Returns: | |
numpy.ndarray: The calculated percentile threshold. | |
""" | |
return np.nanpercentile(data, percentile, axis=0) | |
def calculate_gev_threshold(data, return_period): | |
""" | |
Calculate GEV threshold using Maximum Likelihood Estimation (MLE) for fitting the GEV distribution. | |
Parameters: | |
data (numpy.ndarray): The input precipitation data. | |
return_period (int): The return period for the threshold (e.g., 10 years). | |
Returns: | |
float: The calculated GEV threshold. | |
""" | |
shape, loc, scale = genextreme.fit(data) # MLE is used by default | |
return genextreme.ppf(1 - 1 / return_period, shape, loc=loc, scale=scale) | |
# Process each accumulation period | |
for calc in threshold_calculations: | |
days = calc["days"] | |
input_dir = calc["input_dir"] | |
output_dir = calc["output_dir"] | |
# Ensure output directory exists | |
os.makedirs(output_dir, exist_ok=True) | |
# Create a list of files | |
file_list = sorted(glob.glob(os.path.join(input_dir, f"wld_cli_imerg_fddmax_{days}day_*.nc4"))) | |
# Group files by month | |
files_by_month = {} | |
for file_path in file_list: | |
date_str = file_path.split('_')[-1].split('.')[0] | |
month = date_str[4:6] | |
if month not in files_by_month: | |
files_by_month[month] = [] | |
files_by_month[month].append(file_path) | |
# Process each month | |
with tqdm(total=len(files_by_month), desc=f"Processing {days}-day data by month") as month_pbar: | |
for month, month_files in files_by_month.items(): | |
with tqdm(total=len(threshold_definitions), desc=f"Calculating thresholds for month {month}", leave=False) as threshold_pbar: | |
for threshold_def in threshold_definitions: | |
name = threshold_def["name"] | |
percentile = threshold_def["percentile"] | |
years = threshold_def["years"] | |
output_suffix = threshold_def["output_suffix"] | |
output_filename = f"wld_cli_imerg_{days}day_extreme_threshold_{output_suffix}_{month}.nc4" | |
output_filepath = os.path.join(output_dir, output_filename) | |
if os.path.exists(output_filepath): | |
if user_choice is None: | |
set_user_decision() | |
if user_choice == 'S': | |
print(f"Skipping existing file: {output_filepath}") | |
continue # Skip to the next file | |
elif user_choice == 'A': | |
print("Aborting process.") | |
exit(0) # Exit the script | |
elif user_choice == 'R': | |
pass # Replace the file | |
try: | |
# Load data for the current month | |
datasets = [xr.open_dataset(f) for f in month_files] | |
combined_ds = xr.concat(datasets, dim='time') | |
data = combined_ds['precipitation'].values | |
# Calculate threshold | |
if percentile is not None: | |
threshold = calculate_percentile_threshold(data, percentile) | |
else: | |
threshold = calculate_gev_threshold(data.flatten(), years) | |
# Create an xarray DataArray for the threshold | |
threshold_da = xr.DataArray(threshold, dims=["lat", "lon"], coords={"lat": combined_ds["lat"], "lon": combined_ds["lon"]}) | |
threshold_ds = threshold_da.to_dataset(name="threshold") | |
threshold_ds.attrs['Conventions'] = 'CF-1.8' | |
# Ensure CF conventions and compression | |
encoding = {var: {'zlib': True, 'complevel': 5} for var in threshold_ds.data_vars} | |
threshold_ds.to_netcdf(output_filepath, encoding=encoding) | |
print(f"Saved: {output_filepath}") | |
except Exception as e: | |
print(f"Error processing month {month} for {name} threshold: {e}") | |
finally: | |
for ds in datasets: | |
ds.close() | |
threshold_pbar.update(1) | |
month_pbar.update(1) | |
print("All thresholds calculated and saved.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment