Created
March 17, 2023 11:42
-
-
Save bennyistanto/a504b834f759dca4db99edfc289d50a4 to your computer and use it in GitHub Desktop.
Generate plots on improvements daily rainfall after the bias correction
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
NAME | |
precip_improvement_plots.py | |
Generate plots on improvements daily rainfall after the bias correction | |
DESCRIPTION | |
Input data for this script will use the gauge and satellite-based precipitation | |
estimates data, and corrected precipitation from bias-correction process. | |
REQUIREMENT | |
It required numpy, pandas, scipy, matplotlib, seaborn and xarray module. | |
So it will work on any machine environment | |
EXAMPLES | |
python precip_improvement_plots.py | |
NOTES | |
The code imports the necessary libraries, loads the reference data (CPC) and the | |
original satellite data (IMERG), and calculates the daily bias and RMSE. The | |
performance_metrics() function computes bias, MAE, and RMSE before and after the | |
correction. The script then defines plotting functions for scatter plots, PDFs, CDFs, | |
Bland-Altman plots, and performance metrics. Finally, for each method and year, | |
the script reads the corrected precipitation data, creates and saves the plots, and | |
prints the performance metrics. Make sure the paths and filenames for your input and | |
output files match your file structure. | |
CONTACT | |
Benny Istanto | |
Climate Geographer | |
GOST, The World Bank | |
LICENSE | |
This script is in the public domain, free from copyrights or restrictions. | |
VERSION | |
$Id$ | |
TODO | |
xx | |
""" | |
import os | |
import pandas as pd | |
import xarray as xr | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from scipy.stats import gaussian_kde | |
# Define the list of methods | |
methods = ['scale', 'distribution', 'delta', 'lsc', 'lscdf', 'rcdfm', 'mlr', | |
'ann', 'kalman', 'bcsd', 'bcsdm', 'qq_mapping', 'eQM', 'aQM', 'gQM', 'gpdQM'] # same as before | |
# Define the input and output directory paths | |
input_dir = f'input' | |
imerg_path = f'{input_dir}/imerg' | |
cpc_path = f'{input_dir}/cpc' | |
output_dir = f'output' | |
plots_dir = f'{output_dir}/plots' | |
# Create the output directories if they don't already exist | |
os.makedirs(output_dir, exist_ok=True) | |
os.makedirs(plots_dir, exist_ok=True) | |
# Load reference gauge data (assumed to be a NetCDF file) | |
cpc_ds = xr.open_dataset(f'{cpc_path}/*.nc')['precip'] | |
# Load original satellite data (assumed to be a NetCDF file) | |
imerg_ds = xr.open_dataset(f'{imerg_path}/*.nc')['precipitationCal'] | |
def calculate_daily_bias(cpc_ds, data): | |
""" | |
This function calculates the daily bias between the corrected data and the reference data (CPC) | |
by taking the difference and then finding the mean over the latitude and longitude dimensions. | |
Parameters: | |
- cpc_ds: Reference gauge dataset (CPC) | |
- data: Corrected dataset | |
Returns: Daily bias value | |
""" | |
return (data - cpc_ds).mean(dim=['lat', 'lon']) | |
def calculate_daily_rmse(cpc_ds, data): | |
""" | |
This function calculates the daily root mean square error (RMSE) between the corrected data and | |
the reference data (CPC) by taking the squared difference, finding the mean over the latitude and | |
longitude dimensions, and then taking the square root. | |
Parameters: | |
- cpc_ds: Reference gauge dataset (CPC) | |
- data: Corrected dataset | |
Returns: Daily RMSE value | |
""" | |
return np.sqrt(((data - cpc_ds) ** 2).mean(dim=['lat', 'lon'])) | |
def performance_metrics(cpc_ds, imerg_ds, corrected_ds): | |
""" | |
This function calculates the bias, mean absolute error (MAE), and RMSE before and after the bias | |
correction. It then returns a DataFrame containing these metrics. | |
Parameters: | |
- cpc_ds: Reference gauge dataset (CPC) | |
- imerg_ds: Original satellite dataset (IMERG) | |
- corrected_ds: Corrected dataset | |
Returns: DataFrame with the performance metrics before and after the correction | |
""" | |
bias_before = np.mean(imerg_ds - cpc_ds) | |
bias_after = np.mean(corrected_ds - cpc_ds) | |
mae_before = np.mean(np.abs(imerg_ds - cpc_ds)) | |
mae_after = np.mean(np.abs(corrected_ds - cpc_ds)) | |
rmse_before = np.sqrt(np.mean((imerg_ds - cpc_ds) ** 2)) | |
rmse_after = np.sqrt(np.mean((corrected_ds - cpc_ds) ** 2)) | |
return pd.DataFrame({ | |
'Metric': ['Bias', 'MAE', 'RMSE'], | |
'Before Correction': [bias_before, mae_before, rmse_before], | |
'After Correction': [bias_after, mae_after, rmse_after] | |
}) | |
def plot_scatter(ax, cpc_ds, data, label, color): | |
""" | |
This function creates a scatter plot comparing the reference data to the original or corrected data, | |
with the reference data on the x-axis and the data on the y-axis. | |
Parameters: | |
- ax: Matplotlib axis object for plotting | |
- cpc_ds: Reference gauge dataset (CPC) | |
- data: Dataset to be compared (either original or corrected) | |
- label: Label for the plotted data | |
- color: Color for the plotted data points | |
""" | |
ax.scatter(cpc_ds, data, alpha=0.5, color=color, label=label) | |
ax.set_xlabel('Reference Data') | |
ax.set_ylabel('Data') | |
ax.set_title('Scatter Plot') | |
def plot_pdf(ax, data, label, color): | |
""" | |
This function creates a probability density function (PDF) plot for the reference data, | |
the original satellite data, or the corrected data. It estimates the PDF using Gaussian | |
kernel density estimation and then plots the estimated PDF. | |
Parameters: | |
- ax: Matplotlib axis object for plotting | |
- data: Dataset for which the PDF will be plotted | |
- label: Label for the plotted data | |
- color: Color for the plotted data points | |
""" | |
kernel = gaussian_kde(data) | |
x_vals = np.linspace(data.min(), data.max(), 1000) | |
y_vals = kernel(x_vals) | |
ax.plot(x_vals, y_vals, color=color, label=label) | |
ax.set_xlabel('Value') | |
ax.set_ylabel('Probability Density') | |
ax.set_title('PDF') | |
def plot_cdf(ax, data, label, color): | |
""" | |
This function creates a cumulative distribution function (CDF) plot for the reference data, | |
the original satellite data, or the corrected data. | |
Parameters: | |
- ax: Matplotlib axis object for plotting | |
- data: Dataset for which the CDF will be plotted | |
- label: Label for the plotted data | |
- color: Color for the plotted data points | |
""" | |
sorted_data = np.sort(data) | |
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data) | |
ax.plot(sorted_data, cdf, color=color, label=label) | |
ax.set_xlabel('Value') | |
ax.set_ylabel('Cumulative Probability') | |
ax.set_title('CDF') | |
def plot_bland_altman(ax, cpc_ds, data, label, color): | |
""" | |
This function creates a Bland-Altman plot, which is a scatter plot of the differences between | |
the reference data and the original or corrected data against the average of the reference data | |
and the data. It also plots the mean difference and the 95% limits of agreement. | |
Parameters: | |
- ax: Matplotlib axis object for plotting | |
- cpc_ds: Reference gauge dataset (CPC) | |
- data: Dataset to be compared (either original or corrected) | |
- label: Label for the plotted data | |
- color: Color for the plotted data points | |
""" | |
mean_difference = np.mean(cpc_ds - data) | |
std_difference = np.std(cpc_ds - data, ddof=1) | |
ax.scatter((cpc_ds + data) / 2, cpc_ds - data, alpha=0.5, color=color, label=label) | |
ax.axhline(mean_difference, color='k', linestyle='--') | |
ax.axhline(mean_difference + 1.96 * std_difference, color='k', linestyle='--') | |
ax.axhline(mean_difference - 1.96 * std_difference, color='k', linestyle='--') | |
ax.set_xlabel('Mean of Reference Data and Data') | |
ax.set_ylabel('Difference between Reference Data and Data') | |
ax.set_title('Bland-Altman Plot') | |
def plot_performance_metrics(ax, metrics_df): | |
""" | |
This function creates a bar plot of the performance metrics (bias, MAE, and RMSE) | |
before and after the correction. | |
Parameters: | |
- ax: Matplotlib axis object for plotting | |
- metrics_df: DataFrame with the performance metrics before and after the correction | |
""" | |
metrics_df.plot.bar(x='Metric', y=['Before Correction', 'After Correction'], ax=ax) | |
ax.set_ylabel('Value') | |
ax.set_title('Performance Metrics') | |
ax.grid(True) | |
for method in methods: | |
for year in range(2001, 2023): | |
# Load the corrected precipitation data for the method (assumed to be a NetCDF file) | |
corrected_ds = xr.open_dataset(f'output/{method}/corrected/corrected_{method}_{year}.nc')['precipitation'] | |
# Get the data for this year | |
imerg_year_ds = imerg_ds.sel(time=imerg_ds['time.year'] == year) | |
cpc_year_ds = cpc_ds.sel(time=cpc_ds['time.year'] == year) | |
# Scatter plot | |
fig, scatter_ax = plt.subplots(figsize=(6, 6)) | |
plot_scatter(scatter_ax, cpc_ds, imerg_ds, label='Original Satellite', color='r') | |
plot_scatter(scatter_ax, cpc_ds, corrected_ds, label=f'{method} Corrected', color='b') | |
scatter_ax.legend(loc='upper left') | |
scatter_ax.grid(True) | |
fig.savefig(f'{plots_dir}/{method}_scatter_plot.png', dpi=300, bbox_inches='tight') | |
plt.close(fig) | |
# PDF plot | |
fig, pdf_ax = plt.subplots(figsize=(6, 6)) | |
plot_pdf(pdf_ax, cpc_ds, label='Reference', color='k') | |
plot_pdf(pdf_ax, imerg_ds, label='Original Satellite', color='r') | |
plot_pdf(pdf_ax, corrected_ds, label=f'{method} Corrected', color='b') | |
pdf_ax.legend(loc='upper right') | |
pdf_ax.grid(True) | |
fig.savefig(f'{plots_dir}/{method}_pdf_plot.png', dpi=300, bbox_inches='tight') | |
plt.close(fig) | |
# CDF plot | |
fig, cdf_ax = plt.subplots(figsize=(6, 6)) | |
plot_cdf(cdf_ax, cpc_ds, label='Reference', color='k') | |
plot_cdf(cdf_ax, imerg_ds, label='Original Satellite', color='r') | |
plot_cdf(cdf_ax, corrected_ds, label=f'{method} Corrected', color='b') | |
cdf_ax.legend(loc='upper left') | |
cdf_ax.grid(True) | |
fig.savefig(f'{plots_dir}/{method}_cdf_plot.png', dpi=300, bbox_inches='tight') | |
plt.close(fig) | |
# Bland-Altman plot | |
fig, bland_altman_ax = plt.subplots(figsize=(6, 6)) | |
plot_bland_altman(bland_altman_ax, cpc_ds, imerg_ds, label='Original Satellite', color='r') | |
plot_bland_altman(bland_altman_ax, cpc_ds, corrected_ds, label=f'{method} Corrected', color='b') | |
bland_altman_ax.legend(loc='upper left') | |
bland_altman_ax.grid(True) | |
fig.savefig(f'{plots_dir}/{method}_bland_altman_plot.png', dpi=300, bbox_inches='tight') | |
plt.close(fig) | |
# Performance metrics | |
metrics_df = performance_metrics(cpc_ds, imerg_ds, corrected_ds) | |
print(f"Performance Metrics for {method}:") | |
print(metrics_df.to_string(index=False)) | |
print("\n") | |
fig, metrics_ax = plt.subplots(figsize=(6, 6)) | |
plot_performance_metrics(metrics_ax, metrics_df) | |
fig.savefig(f'{plots_dir}/{method}_performance_metrics_plot.png', dpi=300, bbox_inches='tight') | |
plt.close(fig) | |
cpc_ds.close() | |
imerg_ds.close() | |
corrected_ds.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment