bennyistanto · September 30, 2024 16:43
diff --git a/0_rainfall_distribution.md b/0_rainfall_distribution.md
diff --git a/cdf_comparison.py b/cdf_comparison.py
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.stats import gamma, genpareto
 from sklearn.model_selection import KFold

 # Set the figure DPI to 300
 plt.rcParams['figure.dpi'] = 300

 def calculate_l_moments(data):
    """
    Calculate L-moments for the given data.
    
    L-moments are analogous to conventional moments but can be more robust to outliers.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: L-moments (l1, l2, l3, l4) and L-moment ratios (t2, t3, t4)
    """
    sorted_data = np.sort(data)
    n = len(data)
    
    # Calculate probability weighted moments
    b0 = np.mean(sorted_data)
    b1 = np.sum((np.arange(1, n) * sorted_data[1:]) / (n * (n - 1)))
    b2 = np.sum((np.arange(1, n-1) * (np.arange(2, n) * sorted_data[2:])) / (n * (n - 1) * (n - 2)))
    b3 = np.sum((np.arange(1, n-2) * (np.arange(2, n-1) * (np.arange(3, n) * sorted_data[3:]))) / (n * (n - 1) * (n - 2) * (n - 3)))
    
    # Calculate L-moments
    l1 = b0  # L1 is the mean (measure of location)
    l2 = 2 * b1 - b0  # L2 is a measure of scale (analogous to standard deviation)
    l3 = 6 * b2 - 6 * b1 + b0  # L3 is a measure of skewness
    l4 = 20 * b3 - 30 * b2 + 12 * b1 - b0  # L4 is a measure of kurtosis
    
    # Calculate L-moment ratios
    t2 = l2 / l1 if l1 != 0 else np.nan  # L-CV (coefficient of L-variation)
    t3 = l3 / l2 if l2 != 0 else np.nan  # L-skewness
    t4 = l4 / l2 if l2 != 0 else np.nan  # L-kurtosis
    
    return l1, l2, l3, l4, t2, t3, t4

 def fit_gamma_with_l_moments(data):
    """
    Fit a Gamma distribution to the data using L-moments method.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: Fitted Gamma distribution parameters (shape, loc, scale)
    """
    data = data[~np.isnan(data)]
    if len(data) == 0:
        return 1, 0, 1  # Default values: shape=1, loc=0, scale=1
    
    l1, l2, _, _, t2, _, _ = calculate_l_moments(data)
    shape = (2 / t2) if t2 != 0 else 0.001
    scale = l2 / shape if shape != 0 else l2
    loc = 0
    return shape, loc, scale

 def fit_generalized_pareto_distribution(data, threshold):
    """
    Fit a Generalized Pareto Distribution (GPD) to the excesses above the threshold.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    
    Returns:
    tuple: Fitted GPD parameters (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < 10:  # Arbitrary minimum number of points for GPD fitting
        return (0, 0, 1)  # Return a default GPD with zero shape, zero location, and unit scale
    params = genpareto.fit(excesses)
    return params

 def cross_validate_gpd(data, threshold, n_splits=5):
    """
    Perform cross-validation for GPD fitting.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    n_splits (int): Number of splits for cross-validation
    
    Returns:
    tuple: Average GPD parameters from cross-validation (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < n_splits:
        return fit_generalized_pareto_distribution(data, threshold)
    
    kf = KFold(n_splits=n_splits)
    params_list = []
    for train_index, test_index in kf.split(excesses):
        train_data, test_data = excesses[train_index], excesses[test_index]
        params = genpareto.fit(train_data)
        params_list.append(params)
    
    shape_avg = np.mean([params[0] for params in params_list])
    loc_avg = np.mean([params[1] for params in params_list])
    scale_avg = np.mean([params[2] for params in params_list])
    return shape_avg, loc_avg, scale_avg

 # Data generation
 np.random.seed(42)
 days = 30

 # Generate base rainfall data
 base_rainfall = np.random.gamma(2, 10, days)

 # Create observed rainfall by adding random fluctuations
 observed_rainfall = base_rainfall + np.random.normal(0, 5, days)

 # Create satellite data with its own set of errors and biases
 satellite_rainfall = base_rainfall * np.random.uniform(0.8, 1.2, days) + np.random.normal(0, 7, days)

 # Add extreme values for 7 random days
 extreme_days = np.random.choice(days, 7, replace=False)
 extreme_values = np.random.uniform(50, 100, 7)
 observed_rainfall[extreme_days] += extreme_values
 satellite_rainfall[extreme_days] += extreme_values * np.random.uniform(0.9, 1.1, 7)

 # Ensure non-negative values
 observed_rainfall = np.maximum(observed_rainfall, 0)
 satellite_rainfall = np.maximum(satellite_rainfall, 0)

 # Fit Gamma distributions
 obs_gamma_params = fit_gamma_with_l_moments(observed_rainfall)
 sat_gamma_params = fit_gamma_with_l_moments(satellite_rainfall)

 # Plotting in XKCD style
 plt.xkcd()

 # Create figure and adjust subplots
 fig, axs = plt.subplots(2, 2, figsize=(16, 16))
 fig.suptitle("CDF of Data and Fitted Gamma Distributions", fontsize=24, y=0.98)

 # Percentiles to use for x-axis limits
 percentiles = [80, 90, 96, 98]

 for i, percentile in enumerate(percentiles):
    row = i // 2
    col = i % 2
    
    max_rainfall = np.percentile(np.concatenate([observed_rainfall, satellite_rainfall]), percentile)
    
    x = np.linspace(0, max_rainfall, 100)
    
    # Plot empirical CDF
    axs[row, col].step(np.sort(observed_rainfall), np.linspace(0, 1, len(observed_rainfall)), 
                       'r:', label='Observed Empirical CDF')
    axs[row, col].step(np.sort(satellite_rainfall), np.linspace(0, 1, len(satellite_rainfall)), 
                       'g:', label='Satellite Empirical CDF')
    
    # Plot fitted Gamma CDFs
    axs[row, col].plot(x, gamma.cdf(x, *obs_gamma_params), 'r-', lw=2, label='Observed Gamma CDF')
    axs[row, col].plot(x, gamma.cdf(x, *sat_gamma_params), 'g-', lw=2, label='Satellite Gamma CDF')
    
    axs[row, col].set_title(f'CDF Comparison (up to {percentile}th percentile)')
    axs[row, col].set_xlabel('Rainfall (mm)')
    axs[row, col].set_ylabel('Cumulative Probability')
    axs[row, col].legend(fontsize='x-small')
    axs[row, col].set_xlim(0, max_rainfall)
    axs[row, col].set_ylim(0, 1)

 plt.tight_layout()
 plt.subplots_adjust(top=0.93)  # Adjust the top of the subplots to make room for the title
 plt.show()
diff --git a/gpd_comparison.py b/gpd_comparison.py
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.stats import gamma, genpareto
 from sklearn.model_selection import KFold

 # Set the figure DPI to 300
 plt.rcParams['figure.dpi'] = 300

 def calculate_l_moments(data):
    """
    Calculate L-moments for the given data.
    
    L-moments are analogous to conventional moments but can be more robust to outliers.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: L-moments (l1, l2, l3, l4) and L-moment ratios (t2, t3, t4)
    """
    sorted_data = np.sort(data)
    n = len(data)
    
    # Calculate probability weighted moments
    b0 = np.mean(sorted_data)
    b1 = np.sum((np.arange(1, n) * sorted_data[1:]) / (n * (n - 1)))
    b2 = np.sum((np.arange(1, n-1) * (np.arange(2, n) * sorted_data[2:])) / (n * (n - 1) * (n - 2)))
    b3 = np.sum((np.arange(1, n-2) * (np.arange(2, n-1) * (np.arange(3, n) * sorted_data[3:]))) / (n * (n - 1) * (n - 2) * (n - 3)))
    
    # Calculate L-moments
    l1 = b0  # L1 is the mean (measure of location)
    l2 = 2 * b1 - b0  # L2 is a measure of scale (analogous to standard deviation)
    l3 = 6 * b2 - 6 * b1 + b0  # L3 is a measure of skewness
    l4 = 20 * b3 - 30 * b2 + 12 * b1 - b0  # L4 is a measure of kurtosis
    
    # Calculate L-moment ratios
    t2 = l2 / l1 if l1 != 0 else np.nan  # L-CV (coefficient of L-variation)
    t3 = l3 / l2 if l2 != 0 else np.nan  # L-skewness
    t4 = l4 / l2 if l2 != 0 else np.nan  # L-kurtosis
    
    return l1, l2, l3, l4, t2, t3, t4

 def fit_gamma_with_l_moments(data):
    """
    Fit a Gamma distribution to the data using L-moments method.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: Fitted Gamma distribution parameters (shape, loc, scale)
    """
    data = data[~np.isnan(data)]
    if len(data) == 0:
        return 1, 0, 1  # Default values: shape=1, loc=0, scale=1
    
    l1, l2, _, _, t2, _, _ = calculate_l_moments(data)
    shape = (2 / t2) if t2 != 0 else 0.001
    scale = l2 / shape if shape != 0 else l2
    loc = 0
    return shape, loc, scale

 def fit_generalized_pareto_distribution(data, threshold):
    """
    Fit a Generalized Pareto Distribution (GPD) to the excesses above the threshold.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    
    Returns:
    tuple: Fitted GPD parameters (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < 10:  # Arbitrary minimum number of points for GPD fitting
        return (0, 0, 1)  # Return a default GPD with zero shape, zero location, and unit scale
    params = genpareto.fit(excesses)
    return params

 def cross_validate_gpd(data, threshold, n_splits=5):
    """
    Perform cross-validation for GPD fitting.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    n_splits (int): Number of splits for cross-validation
    
    Returns:
    tuple: Average GPD parameters from cross-validation (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < n_splits:
        return fit_generalized_pareto_distribution(data, threshold)
    
    kf = KFold(n_splits=n_splits)
    params_list = []
    for train_index, test_index in kf.split(excesses):
        train_data, test_data = excesses[train_index], excesses[test_index]
        params = genpareto.fit(train_data)
        params_list.append(params)
    
    shape_avg = np.mean([params[0] for params in params_list])
    loc_avg = np.mean([params[1] for params in params_list])
    scale_avg = np.mean([params[2] for params in params_list])
    return shape_avg, loc_avg, scale_avg

 # Data generation
 np.random.seed(42)
 days = 30

 # Generate base rainfall data
 base_rainfall = np.random.gamma(2, 10, days)

 # Create observed rainfall by adding random fluctuations
 observed_rainfall = base_rainfall + np.random.normal(0, 5, days)

 # Create satellite data with its own set of errors and biases
 satellite_rainfall = base_rainfall * np.random.uniform(0.8, 1.2, days) + np.random.normal(0, 7, days)

 # Add extreme values for 7 random days
 extreme_days = np.random.choice(days, 7, replace=False)
 extreme_values = np.random.uniform(50, 100, 7)
 observed_rainfall[extreme_days] += extreme_values
 satellite_rainfall[extreme_days] += extreme_values * np.random.uniform(0.9, 1.1, 7)

 # Ensure non-negative values
 observed_rainfall = np.maximum(observed_rainfall, 0)
 satellite_rainfall = np.maximum(satellite_rainfall, 0)

 # Plotting in XKCD style
 plt.xkcd()

 # Create figure and adjust subplots
 fig, axs = plt.subplots(2, 2, figsize=(16, 16))
 fig.suptitle("Comparison of GPD Fits for Different Thresholds", fontsize=24, y=0.98)

 # Percentiles to use for thresholds
 percentiles = [80, 90, 96, 98]

 for i, percentile in enumerate(percentiles):
    row = i // 2
    col = i % 2
    
    threshold = np.percentile(observed_rainfall, percentile)
    
    obs_gpd_params = fit_generalized_pareto_distribution(observed_rainfall, threshold)
    sat_gpd_params = fit_generalized_pareto_distribution(satellite_rainfall, threshold)
    obs_gpd_cv_params = cross_validate_gpd(observed_rainfall, threshold)
    sat_gpd_cv_params = cross_validate_gpd(satellite_rainfall, threshold)
    
    obs_excesses = observed_rainfall[observed_rainfall > threshold] - threshold
    sat_excesses = satellite_rainfall[satellite_rainfall > threshold] - threshold
    
    x = np.linspace(0, max(obs_excesses.max(), sat_excesses.max()), 100)
    
    axs[row, col].plot(x, genpareto.pdf(x, *obs_gpd_params), 'r-', lw=2, label='Observed GPD')
    axs[row, col].plot(x, genpareto.pdf(x, *sat_gpd_params), 'g-', lw=2, label='Satellite GPD')
    axs[row, col].plot(x, genpareto.pdf(x, *obs_gpd_cv_params), 'r--', lw=2, label='Observed GPD (CV)')
    axs[row, col].plot(x, genpareto.pdf(x, *sat_gpd_cv_params), 'g--', lw=2, label='Satellite GPD (CV)')
    
    axs[row, col].set_title(f'GPD Fits Comparison ({percentile}th percentile)')
    axs[row, col].set_xlabel('Excess Rainfall (mm)')
    axs[row, col].set_ylabel('Density')
    axs[row, col].legend()

 plt.tight_layout()
 plt.subplots_adjust(top=0.93)  # Adjust the top of the subplots to make room for the title
 plt.show()
diff --git a/rainfall_distribution.py b/rainfall_distribution.py
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.stats import gamma, genpareto
 from sklearn.model_selection import KFold

 # Set the figure DPI to 300
 plt.rcParams['figure.dpi'] = 300

 def calculate_l_moments(data):
    """
    Calculate L-moments for the given data.
    
    L-moments are analogous to conventional moments but can be more robust to outliers.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: L-moments (l1, l2, l3, l4) and L-moment ratios (t2, t3, t4)
    """
    sorted_data = np.sort(data)
    n = len(data)
    
    # Calculate probability weighted moments
    b0 = np.mean(sorted_data)
    b1 = np.sum((np.arange(1, n) * sorted_data[1:]) / (n * (n - 1)))
    b2 = np.sum((np.arange(1, n-1) * (np.arange(2, n) * sorted_data[2:])) / (n * (n - 1) * (n - 2)))
    b3 = np.sum((np.arange(1, n-2) * (np.arange(2, n-1) * (np.arange(3, n) * sorted_data[3:]))) / (n * (n - 1) * (n - 2) * (n - 3)))
    
    # Calculate L-moments
    l1 = b0  # L1 is the mean (measure of location)
    l2 = 2 * b1 - b0  # L2 is a measure of scale (analogous to standard deviation)
    l3 = 6 * b2 - 6 * b1 + b0  # L3 is a measure of skewness
    l4 = 20 * b3 - 30 * b2 + 12 * b1 - b0  # L4 is a measure of kurtosis
    
    # Calculate L-moment ratios
    t2 = l2 / l1 if l1 != 0 else np.nan  # L-CV (coefficient of L-variation)
    t3 = l3 / l2 if l2 != 0 else np.nan  # L-skewness
    t4 = l4 / l2 if l2 != 0 else np.nan  # L-kurtosis
    
    return l1, l2, l3, l4, t2, t3, t4

 def fit_gamma_with_l_moments(data):
    """
    Fit a Gamma distribution to the data using L-moments method.
    
    Args:
    data (np.array): Input data array
    
    Returns:
    tuple: Fitted Gamma distribution parameters (shape, loc, scale)
    """
    data = data[~np.isnan(data)]
    if len(data) == 0:
        return 1, 0, 1  # Default values: shape=1, loc=0, scale=1
    
    l1, l2, _, _, t2, _, _ = calculate_l_moments(data)
    shape = (2 / t2) if t2 != 0 else 0.001
    scale = l2 / shape if shape != 0 else l2
    loc = 0
    return shape, loc, scale

 def fit_generalized_pareto_distribution(data, threshold):
    """
    Fit a Generalized Pareto Distribution (GPD) to the excesses above the threshold.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    
    Returns:
    tuple: Fitted GPD parameters (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < 10:  # Arbitrary minimum number of points for GPD fitting
        return (0, 0, 1)  # Return a default GPD with zero shape, zero location, and unit scale
    params = genpareto.fit(excesses)
    return params

 def cross_validate_gpd(data, threshold, n_splits=5):
    """
    Perform cross-validation for GPD fitting.
    
    Args:
    data (np.array): Input data array
    threshold (float): Threshold for defining excesses
    n_splits (int): Number of splits for cross-validation
    
    Returns:
    tuple: Average GPD parameters from cross-validation (shape, loc, scale)
    """
    excesses = data[data > threshold] - threshold
    if len(excesses) < n_splits:
        return fit_generalized_pareto_distribution(data, threshold)
    
    kf = KFold(n_splits=n_splits)
    params_list = []
    for train_index, test_index in kf.split(excesses):
        train_data, test_data = excesses[train_index], excesses[test_index]
        params = genpareto.fit(train_data)
        params_list.append(params)
    
    shape_avg = np.mean([params[0] for params in params_list])
    loc_avg = np.mean([params[1] for params in params_list])
    scale_avg = np.mean([params[2] for params in params_list])
    return shape_avg, loc_avg, scale_avg

 # Data generation
 np.random.seed(42)
 days = 30

 # Generate base rainfall data
 base_rainfall = np.random.gamma(2, 10, days)

 # Create observed rainfall by adding random fluctuations
 observed_rainfall = base_rainfall + np.random.normal(0, 5, days)

 # Create satellite data with its own set of errors and biases
 satellite_rainfall = base_rainfall * np.random.uniform(0.8, 1.2, days) + np.random.normal(0, 7, days)

 # Add extreme values for 7 random days
 extreme_days = np.random.choice(days, 7, replace=False)
 extreme_values = np.random.uniform(50, 100, 7)
 observed_rainfall[extreme_days] += extreme_values
 satellite_rainfall[extreme_days] += extreme_values * np.random.uniform(0.9, 1.1, 7)

 # Ensure non-negative values
 observed_rainfall = np.maximum(observed_rainfall, 0)
 satellite_rainfall = np.maximum(satellite_rainfall, 0)

 # Fit distributions
 obs_gamma_params = fit_gamma_with_l_moments(observed_rainfall)
 sat_gamma_params = fit_gamma_with_l_moments(satellite_rainfall)

 threshold = np.percentile(observed_rainfall, 90)  # Set threshold at 90th percentile
 obs_gpd_params = fit_generalized_pareto_distribution(observed_rainfall, threshold)
 sat_gpd_params = fit_generalized_pareto_distribution(satellite_rainfall, threshold)

 obs_gpd_cv_params = cross_validate_gpd(observed_rainfall, threshold)
 sat_gpd_cv_params = cross_validate_gpd(satellite_rainfall, threshold)

 # Print basic statistics and fitted parameters
 print("Observed rainfall stats:")
 print(f"Mean: {np.mean(observed_rainfall):.2f}, Std: {np.std(observed_rainfall):.2f}")
 print("Satellite rainfall stats:")
 print(f"Mean: {np.mean(satellite_rainfall):.2f}, Std: {np.std(satellite_rainfall):.2f}")

 print("\nFitted parameters:")
 print("Observed Gamma params:", obs_gamma_params)
 print("Satellite Gamma params:", sat_gamma_params)
 print("Observed GPD params:", obs_gpd_params)
 print("Satellite GPD params:", sat_gpd_params)
 print("Observed GPD CV params:", obs_gpd_cv_params)
 print("Satellite GPD CV params:", sat_gpd_cv_params)

 # Plotting in XKCD style
 plt.xkcd()
 # Create figure and adjust subplots
 fig, axs = plt.subplots(3, 2, figsize=(16, 20))  # Increased height to accommodate title
 fig.suptitle("Rainfall Distribution Analysis: Observed vs Satellite", fontsize=24, y=0.99)

 # Plot 1: Time series of rainfall
 plt.subplot(3, 2, 1)
 plt.plot(observed_rainfall, label='Observed', marker='o')
 plt.plot(satellite_rainfall, label='Satellite', marker='s')
 plt.title('Daily Rainfall Time Series')
 plt.xlabel('Day')
 plt.ylabel('Rainfall (mm)')
 plt.legend()

 # Plot 2: Histogram and fitted Gamma distribution
 plt.subplot(3, 2, 2)
 bins = np.linspace(0, max(observed_rainfall.max(), satellite_rainfall.max()), 30)
 plt.hist(observed_rainfall, bins=bins, alpha=0.5, density=True, label='Observed')
 plt.hist(satellite_rainfall, bins=bins, alpha=0.5, density=True, label='Satellite')

 x = np.linspace(0, max(observed_rainfall.max(), satellite_rainfall.max()), 100)
 plt.plot(x, gamma.pdf(x, *obs_gamma_params), 'r-', lw=2, label='Observed Gamma Fit')
 plt.plot(x, gamma.pdf(x, *sat_gamma_params), 'g-', lw=2, label='Satellite Gamma Fit')

 plt.title('Histogram and Gamma Distribution Fit')
 plt.xlabel('Rainfall (mm)')
 plt.ylabel('Density')
 plt.legend()

 # Plot 3: Q-Q plot for GPD
 plt.subplot(3, 2, 3)
 obs_excesses = observed_rainfall[observed_rainfall > threshold] - threshold
 sat_excesses = satellite_rainfall[satellite_rainfall > threshold] - threshold

 obs_theoretical_quantiles = genpareto.ppf(np.linspace(0.01, 0.99, len(obs_excesses)), *obs_gpd_params)
 sat_theoretical_quantiles = genpareto.ppf(np.linspace(0.01, 0.99, len(sat_excesses)), *sat_gpd_params)

 plt.scatter(np.sort(obs_excesses), obs_theoretical_quantiles, label='Observed')
 plt.scatter(np.sort(sat_excesses), sat_theoretical_quantiles, label='Satellite')
 plt.plot([0, max(obs_excesses.max(), sat_excesses.max())], [0, max(obs_excesses.max(), sat_excesses.max())], 'r--')
 plt.title('Q-Q Plot for GPD Fit')
 plt.xlabel('Empirical Quantiles')
 plt.ylabel('Theoretical Quantiles')
 plt.legend()

 # Plot 4: Comparison of GPD fits
 plt.subplot(3, 2, 4)
 x = np.linspace(0, max(obs_excesses.max(), sat_excesses.max()), 100)
 plt.plot(x, genpareto.pdf(x, *obs_gpd_params), 'r-', lw=2, label='Observed GPD')
 plt.plot(x, genpareto.pdf(x, *sat_gpd_params), 'g-', lw=2, label='Satellite GPD')
 plt.plot(x, genpareto.pdf(x, *obs_gpd_cv_params), 'r--', lw=2, label='Observed GPD (CV)')
 plt.plot(x, genpareto.pdf(x, *sat_gpd_cv_params), 'g--', lw=2, label='Satellite GPD (CV)')
 plt.title('GPD Fits Comparison')
 plt.xlabel('Excess Rainfall (mm)')
 plt.ylabel('Density')
 plt.legend()

 # Plot 5: Observed vs Satellite Rainfall
 plt.subplot(3, 2, 5)
 plt.scatter(observed_rainfall, satellite_rainfall)
 plt.plot([0, max(observed_rainfall.max(), satellite_rainfall.max())], 
         [0, max(observed_rainfall.max(), satellite_rainfall.max())], 'r--')
 plt.title('Observed vs Satellite Rainfall')
 plt.xlabel('Observed Rainfall (mm)')
 plt.ylabel('Satellite Rainfall (mm)')

 # Plot 6: CDF of data and fitted Gamma distributions
 plt.subplot(3, 2, 6)
 x = np.linspace(0, max(observed_rainfall.max(), satellite_rainfall.max()), 100)
 plt.plot(x, gamma.cdf(x, *obs_gamma_params), 'r-', lw=2, label='Observed Gamma CDF')
 plt.plot(x, gamma.cdf(x, *sat_gamma_params), 'g-', lw=2, label='Satellite Gamma CDF')
 plt.step(np.sort(observed_rainfall), np.linspace(0, 1, len(observed_rainfall)), 'r:', label='Observed Empirical CDF')
 plt.step(np.sort(satellite_rainfall), np.linspace(0, 1, len(satellite_rainfall)), 'g:', label='Satellite Empirical CDF')
 plt.title('CDF Comparison')
 plt.xlabel('Rainfall (mm)')
 plt.ylabel('Cumulative Probability')
 plt.legend()

 plt.tight_layout()
 plt.subplots_adjust(top=0.93)  # Adjust the top of the subplots to make room for the title
 plt.show()
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy.stats import gamma, genpareto
	from sklearn.model_selection import KFold

	# Set the figure DPI to 300
	plt.rcParams['figure.dpi'] = 300

	def calculate_l_moments(data):
	"""
	Calculate L-moments for the given data.

	L-moments are analogous to conventional moments but can be more robust to outliers.

	Args:
	data (np.array): Input data array

	Returns:
	tuple: L-moments (l1, l2, l3, l4) and L-moment ratios (t2, t3, t4)
	"""
	sorted_data = np.sort(data)
	n = len(data)

	# Calculate probability weighted moments
	b0 = np.mean(sorted_data)
	b1 = np.sum((np.arange(1, n) * sorted_data[1:]) / (n * (n - 1)))
	b2 = np.sum((np.arange(1, n-1) * (np.arange(2, n) * sorted_data[2:])) / (n * (n - 1) * (n - 2)))
	b3 = np.sum((np.arange(1, n-2) * (np.arange(2, n-1) * (np.arange(3, n) * sorted_data[3:]))) / (n * (n - 1) * (n - 2) * (n - 3)))

	# Calculate L-moments
	l1 = b0 # L1 is the mean (measure of location)
	l2 = 2 * b1 - b0 # L2 is a measure of scale (analogous to standard deviation)
	l3 = 6 * b2 - 6 * b1 + b0 # L3 is a measure of skewness
	l4 = 20 * b3 - 30 * b2 + 12 * b1 - b0 # L4 is a measure of kurtosis

	# Calculate L-moment ratios
	t2 = l2 / l1 if l1 != 0 else np.nan # L-CV (coefficient of L-variation)
	t3 = l3 / l2 if l2 != 0 else np.nan # L-skewness
	t4 = l4 / l2 if l2 != 0 else np.nan # L-kurtosis

	return l1, l2, l3, l4, t2, t3, t4

	def fit_gamma_with_l_moments(data):
	"""
	Fit a Gamma distribution to the data using L-moments method.

	Args:
	data (np.array): Input data array

	Returns:
	tuple: Fitted Gamma distribution parameters (shape, loc, scale)
	"""
	data = data[~np.isnan(data)]
	if len(data) == 0:
	return 1, 0, 1 # Default values: shape=1, loc=0, scale=1

	l1, l2, _, _, t2, _, _ = calculate_l_moments(data)
	shape = (2 / t2) if t2 != 0 else 0.001
	scale = l2 / shape if shape != 0 else l2
	loc = 0
	return shape, loc, scale

	def fit_generalized_pareto_distribution(data, threshold):
	"""
	Fit a Generalized Pareto Distribution (GPD) to the excesses above the threshold.

	Args:
	data (np.array): Input data array
	threshold (float): Threshold for defining excesses

	Returns:
	tuple: Fitted GPD parameters (shape, loc, scale)
	"""
	excesses = data[data > threshold] - threshold
	if len(excesses) < 10: # Arbitrary minimum number of points for GPD fitting
	return (0, 0, 1) # Return a default GPD with zero shape, zero location, and unit scale
	params = genpareto.fit(excesses)
	return params

	def cross_validate_gpd(data, threshold, n_splits=5):
	"""
	Perform cross-validation for GPD fitting.

	Args:
	data (np.array): Input data array
	threshold (float): Threshold for defining excesses
	n_splits (int): Number of splits for cross-validation

	Returns:
	tuple: Average GPD parameters from cross-validation (shape, loc, scale)
	"""
	excesses = data[data > threshold] - threshold
	if len(excesses) < n_splits:
	return fit_generalized_pareto_distribution(data, threshold)

	kf = KFold(n_splits=n_splits)
	params_list = []
	for train_index, test_index in kf.split(excesses):
	train_data, test_data = excesses[train_index], excesses[test_index]
	params = genpareto.fit(train_data)
	params_list.append(params)

	shape_avg = np.mean([params[0] for params in params_list])
	loc_avg = np.mean([params[1] for params in params_list])
	scale_avg = np.mean([params[2] for params in params_list])
	return shape_avg, loc_avg, scale_avg

	# Data generation
	np.random.seed(42)
	days = 30

	# Generate base rainfall data
	base_rainfall = np.random.gamma(2, 10, days)

	# Create observed rainfall by adding random fluctuations
	observed_rainfall = base_rainfall + np.random.normal(0, 5, days)

	# Create satellite data with its own set of errors and biases
	satellite_rainfall = base_rainfall * np.random.uniform(0.8, 1.2, days) + np.random.normal(0, 7, days)

	# Add extreme values for 7 random days
	extreme_days = np.random.choice(days, 7, replace=False)
	extreme_values = np.random.uniform(50, 100, 7)
	observed_rainfall[extreme_days] += extreme_values
	satellite_rainfall[extreme_days] += extreme_values * np.random.uniform(0.9, 1.1, 7)

	# Ensure non-negative values
	observed_rainfall = np.maximum(observed_rainfall, 0)
	satellite_rainfall = np.maximum(satellite_rainfall, 0)

	# Fit Gamma distributions
	obs_gamma_params = fit_gamma_with_l_moments(observed_rainfall)
	sat_gamma_params = fit_gamma_with_l_moments(satellite_rainfall)

	# Plotting in XKCD style
	plt.xkcd()

	# Create figure and adjust subplots
	fig, axs = plt.subplots(2, 2, figsize=(16, 16))
	fig.suptitle("CDF of Data and Fitted Gamma Distributions", fontsize=24, y=0.98)

	# Percentiles to use for x-axis limits
	percentiles = [80, 90, 96, 98]

	for i, percentile in enumerate(percentiles):
	row = i // 2
	col = i % 2

	max_rainfall = np.percentile(np.concatenate([observed_rainfall, satellite_rainfall]), percentile)

	x = np.linspace(0, max_rainfall, 100)

	# Plot empirical CDF
	axs[row, col].step(np.sort(observed_rainfall), np.linspace(0, 1, len(observed_rainfall)),
	'r:', label='Observed Empirical CDF')
	axs[row, col].step(np.sort(satellite_rainfall), np.linspace(0, 1, len(satellite_rainfall)),
	'g:', label='Satellite Empirical CDF')

	# Plot fitted Gamma CDFs
	axs[row, col].plot(x, gamma.cdf(x, *obs_gamma_params), 'r-', lw=2, label='Observed Gamma CDF')
	axs[row, col].plot(x, gamma.cdf(x, *sat_gamma_params), 'g-', lw=2, label='Satellite Gamma CDF')

	axs[row, col].set_title(f'CDF Comparison (up to {percentile}th percentile)')
	axs[row, col].set_xlabel('Rainfall (mm)')
	axs[row, col].set_ylabel('Cumulative Probability')
	axs[row, col].legend(fontsize='x-small')
	axs[row, col].set_xlim(0, max_rainfall)
	axs[row, col].set_ylim(0, 1)

	plt.tight_layout()
	plt.subplots_adjust(top=0.93) # Adjust the top of the subplots to make room for the title
	plt.show()