Last active
October 11, 2025 06:36
-
-
Save tonyreina/f3181e598414f26bd2392c1e416bfc02 to your computer and use it in GitHub Desktop.
Epidemic curve with Bayesian modeling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # π¬ ANIMATED BAYESIAN LEARNING DEMONSTRATION | |
| # Create a GIF showing how the model evolves as each data point is added | |
| # π¦ BAYESIAN EPIDEMIC MODELING FOR STUDENTS | |
| # Complete demonstration in a single cell | |
| # Shows uncertainty quantification for public health decision-making | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from scipy.linalg import cho_factor, cho_solve | |
| from scipy.stats import norm | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Set up plotting | |
| plt.style.use('default') | |
| plt.rcParams['figure.figsize'] = (15, 10) | |
| print("π¦ Bayesian Epidemic Modeling Demo") | |
| print("=" * 50) | |
| # ============================================================================= | |
| # STEP 1: CREATE REALISTIC EPIDEMIC SCENARIO | |
| # ============================================================================= | |
| def true_epidemic_curve(t): | |
| """Realistic epidemic with two distinct waves optimized for polynomial fitting""" | |
| # First wave: smaller, earlier peak | |
| wave1_center, wave1_height, wave1_width = 35, 22, 12 | |
| wave1 = wave1_height * np.exp(-0.5 * ((t - wave1_center) / wave1_width) ** 2) | |
| # Second wave: larger, later peak with asymmetric shape | |
| wave2_center, wave2_height = 85, 32 | |
| wave2_rise_width, wave2_fall_width = 15, 25 # Asymmetric: faster rise, slower fall | |
| wave2 = np.where(t <= wave2_center, | |
| wave2_height * np.exp(-0.5 * ((t - wave2_center) / wave2_rise_width) ** 2), | |
| wave2_height * np.exp(-0.5 * ((t - wave2_center) / wave2_fall_width) ** 2)) | |
| # Baseline level with slight trend | |
| baseline = 4 + 0.02 * (t - 60) # Slight increase over time | |
| baseline = np.maximum(baseline, 2) # Minimum baseline of 2 | |
| total = baseline + wave1 + wave2 | |
| return np.maximum(total, 0) # Ensure non-negative | |
| # Generate true epidemic curve | |
| days_true = np.linspace(0, 120, 300) | |
| infections_true = true_epidemic_curve(days_true) | |
| # ============================================================================= | |
| # STEP 2: SIMULATE REALISTIC SURVEILLANCE DATA | |
| # ============================================================================= | |
| np.random.seed(42) # Reproducible results | |
| # Sparse observation times (realistic reporting) | |
| observed_days = np.array([5, 12, 18, 25, 32, 38, 45, 52, 58, 65, 72, 79, 86, 93, 100, 107]) | |
| # Add measurement noise | |
| noise_level = 3.0 | |
| true_values = true_epidemic_curve(observed_days) | |
| observed_cases = true_values + np.random.normal(0, noise_level, len(observed_days)) | |
| observed_cases = np.maximum(observed_cases, 0) # No negative cases | |
| print(f"π Surveillance data: {len(observed_days)} observations with noise level {noise_level}") | |
| # ============================================================================= | |
| # STEP 3: BAYESIAN POLYNOMIAL REGRESSION | |
| # ============================================================================= | |
| def create_epidemic_features(X, n_features=12): | |
| """Create advanced epidemic-specific basis functions optimized for two-wave patterns""" | |
| X = np.array(X).flatten() | |
| if len(X) == 0: | |
| return np.array([]).reshape(0, n_features) | |
| features = [] | |
| # 1. Constant baseline (background infection level) | |
| features.append(np.ones(len(X))) | |
| # 2-3. Linear and quadratic trends for overall epidemic trajectory | |
| X_norm = X / 120.0 | |
| features.append(X_norm) | |
| features.append(X_norm ** 2) | |
| # 4-7. Multiple Gaussian bells for epidemic waves with different widths | |
| wave_centers = [30, 45, 75, 90] # Optimized for two-wave pattern | |
| wave_widths = [15, 12, 18, 15] # Different widths for different wave types | |
| for center, width in zip(wave_centers, wave_widths): | |
| features.append(np.exp(-0.5 * ((X - center) / width) ** 2)) | |
| # 8-9. Asymmetric wave functions (log-normal style for realistic epidemic curves) | |
| for center in [35, 85]: | |
| # Skewed Gaussian (faster rise, slower decline - typical epidemic pattern) | |
| left_wing = np.where(X <= center, | |
| np.exp(-0.5 * ((X - center) / 10) ** 2), | |
| np.exp(-0.5 * ((X - center) / 20) ** 2)) | |
| features.append(left_wing) | |
| # 10-11. Logistic growth curves for epidemic phases | |
| for inflection in [25, 80]: | |
| steepness = 0.15 | |
| features.append(1 / (1 + np.exp(-steepness * (X - inflection)))) | |
| # 12. Interaction term for wave interference | |
| if n_features >= 12: | |
| wave1_component = np.exp(-0.5 * ((X - 35) / 15) ** 2) | |
| wave2_component = np.exp(-0.5 * ((X - 85) / 20) ** 2) | |
| features.append(wave1_component * wave2_component) | |
| return np.column_stack(features[:n_features]) | |
| def bayesian_regression(X, y, n_features=12, alpha=1.0, beta=1.0): | |
| """ | |
| Enhanced Bayesian regression with adaptive regularization | |
| Parameters: | |
| - n_features: Number of basis functions to use | |
| - alpha: Prior precision (higher = more regularization) | |
| - beta: Noise precision (higher = assume less noise) | |
| """ | |
| Phi = create_epidemic_features(X, n_features) | |
| # Adaptive regularization: stronger for higher-order features | |
| alpha_vec = np.ones(Phi.shape[1]) * alpha | |
| # Less regularization for basic functions (baseline, trends, main waves) | |
| alpha_vec[:5] *= 0.5 | |
| # More regularization for complex interaction terms | |
| if len(alpha_vec) > 8: | |
| alpha_vec[8:] *= 2.0 | |
| # Bayesian update with feature-specific regularization | |
| S_inv = np.diag(alpha_vec) + beta * (Phi.T @ Phi) | |
| # Numerical stability with Cholesky | |
| try: | |
| L, lower = cho_factor(S_inv, lower=True) | |
| except: | |
| # Fallback for numerical issues | |
| S_inv += 1e-6 * np.eye(S_inv.shape[0]) | |
| L, lower = cho_factor(S_inv, lower=True) | |
| # Posterior mean and covariance | |
| m = cho_solve((L, lower), beta * Phi.T @ y) | |
| S = cho_solve((L, lower), np.eye(len(m))) | |
| return m, S | |
| def predict_with_uncertainty(X_test, m, S, n_features=12, beta=1.0): | |
| """Make predictions with uncertainty bands using epidemic features""" | |
| Phi_test = create_epidemic_features(X_test, n_features) | |
| y_mean = Phi_test @ m | |
| y_var = 1/beta + np.sum((Phi_test @ S) * Phi_test, axis=1) | |
| y_std = np.sqrt(y_var) | |
| return y_mean, y_std | |
| # ============================================================================= | |
| # STEP 4: DEMONSTRATE BAYESIAN LEARNING | |
| # ============================================================================= | |
| # Model parameters - optimized for epidemic modeling | |
| n_features = 12 # More features for complex epidemic patterns | |
| alpha = 0.05 # Even lighter regularization for flexibility | |
| beta = 1.0 / (noise_level**2) | |
| X_pred = np.linspace(0, 120, 300) | |
| # Show learning progression | |
| fig = plt.figure(figsize=(20, 12)) | |
| data_sizes = [4, 8, 12, len(observed_days)] | |
| stage_names = ['Early Outbreak', 'Growing Evidence', 'Pattern Emerging', 'Full Dataset'] | |
| for i, (n_data, stage) in enumerate(zip(data_sizes, stage_names)): | |
| plt.subplot(2, 3, i+1) | |
| # Use subset of data | |
| X_train = observed_days[:n_data] | |
| y_train = observed_cases[:n_data] | |
| # Fit model | |
| m, S = bayesian_regression(X_train, y_train, n_features, alpha, beta) | |
| y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta) | |
| # Plot results | |
| plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.7, label='True curve') | |
| plt.scatter(X_train, y_train, color='black', s=80, zorder=5, label=f'Data (n={n_data})') | |
| plt.plot(X_pred, y_pred_mean, 'b-', linewidth=2, label='Bayesian prediction') | |
| plt.fill_between(X_pred, y_pred_mean - 2*y_pred_std, y_pred_mean + 2*y_pred_std, | |
| alpha=0.2, color='blue', label='95% confidence') | |
| plt.xlim(0, 120) | |
| plt.ylim(0, 70) | |
| plt.title(f'{stage} (Day {X_train[-1]:.0f})', fontsize=12) | |
| plt.xlabel('Days since outbreak') | |
| plt.ylabel('Cases per 100K') | |
| plt.grid(True, alpha=0.3) | |
| plt.legend(fontsize=9) | |
| # ============================================================================= | |
| # STEP 5: COMPARE DIFFERENT PRIORS | |
| # ============================================================================= | |
| plt.subplot(2, 3, 5) | |
| # Early data to show prior influence | |
| early_days = observed_days[:6] | |
| early_cases = observed_cases[:6] | |
| priors = {'Skeptical (Ξ±=5)': 5.0, 'Balanced (Ξ±=1)': 1.0, 'Flexible (Ξ±=0.1)': 0.1} | |
| colors = ['green', 'blue', 'purple'] | |
| plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.7, label='True curve') | |
| plt.scatter(early_days, early_cases, color='black', s=80, zorder=5, label='Early data') | |
| for (name, alpha_prior), color in zip(priors.items(), colors): | |
| m, S = bayesian_regression(early_days, early_cases, n_features, alpha_prior, beta) | |
| y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta) | |
| plt.plot(X_pred, y_pred_mean, color=color, linewidth=2, label=name) | |
| plt.xlim(0, 120) | |
| plt.ylim(0, 70) | |
| plt.title('Prior Belief Comparison', fontsize=12) | |
| plt.xlabel('Days since outbreak') | |
| plt.ylabel('Cases per 100K') | |
| plt.grid(True, alpha=0.3) | |
| plt.legend(fontsize=9) | |
| # ============================================================================= | |
| # STEP 6: PUBLIC HEALTH DECISION MAKING | |
| # ============================================================================= | |
| plt.subplot(2, 3, 6) | |
| # Final model with all data | |
| m_final, S_final = bayesian_regression(observed_days, observed_cases, n_features, alpha, beta) | |
| y_final_mean, y_final_std = predict_with_uncertainty(X_pred, m_final, S_final, n_features, beta) | |
| # Policy parameters | |
| hospital_capacity = 35 | |
| intervention_day = 50 | |
| # Plot forecast | |
| plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.8, label='True curve') | |
| plt.scatter(observed_days, observed_cases, color='black', s=80, zorder=5, label='Data') | |
| plt.plot(X_pred, y_final_mean, 'b-', linewidth=3, label='Forecast') | |
| plt.fill_between(X_pred, y_final_mean - 2*y_final_std, y_final_mean + 2*y_final_std, | |
| alpha=0.3, color='blue', label='95% interval') | |
| plt.axhline(hospital_capacity, color='orange', linestyle='--', linewidth=2, | |
| label='Hospital capacity') | |
| plt.axvline(intervention_day, color='purple', linestyle='--', linewidth=2, | |
| label='Intervention option') | |
| plt.xlim(0, 120) | |
| plt.ylim(0, 70) | |
| plt.title('Policy Decision Support', fontsize=12) | |
| plt.xlabel('Days since outbreak') | |
| plt.ylabel('Cases per 100K') | |
| plt.grid(True, alpha=0.3) | |
| plt.legend(fontsize=9) | |
| plt.tight_layout() | |
| plt.show() | |
| # ============================================================================= | |
| # STEP 7: QUANTITATIVE RISK ASSESSMENT | |
| # ============================================================================= | |
| # Calculate key metrics for decision making | |
| future_days = X_pred[X_pred > observed_days[-1]] | |
| future_mean = y_final_mean[X_pred > observed_days[-1]] | |
| future_std = y_final_std[X_pred > observed_days[-1]] | |
| peak_idx = np.argmax(future_mean) | |
| peak_day = future_days[peak_idx] | |
| peak_mean = future_mean[peak_idx] | |
| peak_std = future_std[peak_idx] | |
| # Probability of exceeding hospital capacity | |
| prob_exceed = 1 - norm.cdf(hospital_capacity, peak_mean, peak_std) | |
| print("\nπ₯ PUBLIC HEALTH DECISION SUPPORT") | |
| print("=" * 50) | |
| print(f"π Predicted peak: Day {peak_day:.0f}") | |
| print(f"π Peak cases: {peak_mean:.1f} Β± {peak_std:.1f} per 100K") | |
| print(f"β οΈ Probability of exceeding hospital capacity: {prob_exceed:.1%}") | |
| print(f"π‘ Recommendation: {'π¨ IMPLEMENT INTERVENTION' if prob_exceed > 0.3 else 'π MONITOR CLOSELY'}") | |
| print("\nπ KEY LEARNING POINTS") | |
| print("=" * 50) | |
| print("β Uncertainty decreases as more data arrives") | |
| print("β Prior beliefs matter most with limited data") | |
| print("β Bayesian methods provide probabilistic forecasts") | |
| print("β Uncertainty quantification enables risk-based decisions") | |
| print("β Perfect for early outbreak response when stakes are high") | |
| print(f"\nπ Model Summary: {len(observed_days)} observations, {n_features} epidemic basis functions") | |
| print(f"π― Final prediction accuracy: Mean absolute error = {np.mean(np.abs(true_epidemic_curve(observed_days) - observed_cases)):.1f} cases/100K") | |
| import matplotlib.animation as animation | |
| from matplotlib.patches import Rectangle | |
| import tempfile | |
| import os | |
| def create_bayesian_learning_animation( | |
| save_path="bayesian_epidemic_learning.gif", | |
| fps=1.5, # Slower for educational viewing | |
| dpi=100 | |
| ): | |
| """ | |
| Create an animated GIF showing Bayesian learning in action | |
| Each frame shows the model after adding one more data point | |
| """ | |
| print("π¬ Creating Bayesian Learning Animation...") | |
| print("=" * 50) | |
| # Animation parameters | |
| n_data_points = len(observed_days) | |
| frames = [] | |
| # Create figure for animation - single plot only | |
| fig, ax1 = plt.subplots(1, 1, figsize=(12, 8)) | |
| for frame_idx in range(n_data_points + 1): | |
| print(f"πΉ Generating frame {frame_idx + 1}/{n_data_points + 1}") | |
| # Clear axes | |
| ax1.clear() | |
| # Determine how much data to use | |
| if frame_idx == 0: | |
| # Prior only (no data) | |
| X_train = np.array([]) | |
| y_train = np.array([]) | |
| title_suffix = "Prior Beliefs Only" | |
| data_description = "No surveillance data yet" | |
| else: | |
| # Use first frame_idx data points | |
| X_train = observed_days[:frame_idx] | |
| y_train = observed_cases[:frame_idx] | |
| title_suffix = f"After {frame_idx} observations" | |
| data_description = f"Using {frame_idx} surveillance reports" | |
| # Model predictions plot | |
| ax1.set_xlim(0, 120) | |
| ax1.set_ylim(0, 60) | |
| ax1.grid(True, alpha=0.3) | |
| ax1.set_xlabel('Days since outbreak start', fontsize=14) | |
| ax1.set_ylabel('Daily cases per 100K population', fontsize=14) | |
| ax1.set_title(f'Epidemic Model - {title_suffix}', fontsize=16, fontweight='bold') | |
| # Always show the true curve | |
| ax1.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.8, | |
| label='True epidemic curve (unknown)', zorder=1) | |
| if len(X_train) == 0: | |
| # Show prior predictive distribution | |
| m_prior = np.zeros(n_features) | |
| # Prior covariance with weak prior precision | |
| S_prior = np.eye(n_features) / 0.05 | |
| y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m_prior, S_prior, n_features, beta) | |
| uncertainty_text = "Very High Uncertainty" | |
| uncertainty_color = 'red' | |
| else: | |
| # Fit model with current data | |
| m, S = bayesian_regression(X_train, y_train, n_features, alpha, beta) | |
| y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta) | |
| # Calculate uncertainty level | |
| avg_uncertainty = np.mean(y_pred_std) | |
| if avg_uncertainty > 8: | |
| uncertainty_text = "High Uncertainty" | |
| uncertainty_color = 'orange' | |
| elif avg_uncertainty > 5: | |
| uncertainty_text = "Moderate Uncertainty" | |
| uncertainty_color = 'yellow' | |
| else: | |
| uncertainty_text = "Lower Uncertainty" | |
| uncertainty_color = 'lightgreen' | |
| # Plot model predictions | |
| ax1.plot(X_pred, y_pred_mean, 'b-', linewidth=3, label='Bayesian prediction', zorder=3) | |
| ax1.fill_between(X_pred, y_pred_mean - 2*y_pred_std, y_pred_mean + 2*y_pred_std, | |
| alpha=0.2, color='blue', label='95% prediction interval', zorder=2) | |
| ax1.fill_between(X_pred, y_pred_mean - y_pred_std, y_pred_mean + y_pred_std, | |
| alpha=0.3, color='blue', label='68% prediction interval', zorder=2) | |
| # Show observed data points | |
| if len(X_train) > 0: | |
| ax1.scatter(X_train, y_train, color='black', s=100, zorder=5, | |
| label=f'Observed data (n={len(X_train)})', edgecolors='white', linewidth=2) | |
| # Highlight the newest point | |
| if len(X_train) > 1: | |
| ax1.scatter(X_train[-1], y_train[-1], color='yellow', s=150, zorder=6, | |
| marker='*', edgecolors='black', linewidth=2, label='Latest observation') | |
| # Show future data points as grayed out | |
| if frame_idx < n_data_points: | |
| future_X = observed_days[frame_idx:] | |
| future_y = observed_cases[frame_idx:] | |
| ax1.scatter(future_X, future_y, color='lightgray', s=60, alpha=0.5, zorder=1, | |
| label='Future data (unknown)') | |
| # Add uncertainty indicator | |
| ax1.text(0.02, 0.98, f'Uncertainty Level: {uncertainty_text}', | |
| transform=ax1.transAxes, fontsize=11, fontweight='bold', | |
| bbox=dict(boxstyle='round,pad=0.5', facecolor=uncertainty_color, alpha=0.7), | |
| verticalalignment='top') | |
| # Add data info | |
| ax1.text(0.02, 0.02, data_description, | |
| transform=ax1.transAxes, fontsize=10, | |
| bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue', alpha=0.7), | |
| verticalalignment='bottom') | |
| ax1.legend(loc='upper right', fontsize=11) | |
| # Add frame number | |
| fig.suptitle(f'Bayesian Learning: Every observation updates the model', | |
| fontsize=18, fontweight='bold') | |
| plt.tight_layout() | |
| # Save frame to temporary file | |
| temp_file = f'temp_frame_{frame_idx:03d}.png' | |
| plt.savefig(temp_file, dpi=dpi, bbox_inches='tight', facecolor='white') | |
| frames.append(temp_file) | |
| # Create GIF from frames | |
| print(f"\nπ₯ Assembling {len(frames)} frames into GIF...") | |
| # Import imageio for GIF creation | |
| try: | |
| import imageio.v2 as imageio | |
| # Read all frames and ensure consistent dimensions | |
| images = [] | |
| for frame_file in frames: | |
| img = imageio.imread(frame_file) | |
| images.append(img) | |
| # Ensure all images have the same shape by cropping/padding to the minimum common size | |
| if len(images) > 1: | |
| # Find minimum dimensions | |
| min_height = min(img.shape[0] for img in images) | |
| min_width = min(img.shape[1] for img in images) | |
| # Crop all images to the same size | |
| processed_images = [] | |
| for img in images: | |
| if len(img.shape) == 3: # RGB image | |
| cropped = img[:min_height, :min_width, :] | |
| else: # Grayscale | |
| cropped = img[:min_height, :min_width] | |
| processed_images.append(cropped) | |
| images = processed_images | |
| # Create custom frame durations as requested | |
| durations = [] | |
| for i in range(len(images)): | |
| if i < 7: # First 7 frames: 10 seconds each | |
| durations.append(3000.0) | |
| elif i < 12: # Next 5 frames (frames 7-11): 5 seconds each | |
| durations.append(2000.0) | |
| else: # Remaining frames: 1 second each | |
| durations.append(1000.0) | |
| # Save GIF with consistent frame sizes | |
| imageio.mimsave(save_path, images, duration=durations, loop=0) | |
| # Clean up temporary files | |
| for frame_file in frames: | |
| try: | |
| os.remove(frame_file) | |
| except: | |
| pass | |
| print(f"β Animation saved to: {save_path}") | |
| print(f"π Animation details:") | |
| print(f" - {len(images)} frames") | |
| print(f" - Dimensions: {images[0].shape}") | |
| print(f" - Custom timing: 10s (first 7), 5s (next 5), 1s (remaining)") | |
| print(f" - Single plot focus for clarity") | |
| print(f" - Shows uncertainty reduction over time") | |
| print(f" - Perfect for educational presentations") | |
| except ImportError: | |
| print("β Error: imageio not available for GIF creation") | |
| print("π‘ Install with: pip install imageio") | |
| return None | |
| except Exception as e: | |
| print(f"β Error creating GIF: {e}") | |
| print("π‘ Frames were created successfully, GIF assembly failed") | |
| return None | |
| plt.close(fig) | |
| return save_path | |
| # Create the animation | |
| print("π¬ CREATING BAYESIAN LEARNING ANIMATION") | |
| print("=" * 50) | |
| print("This will show how the model evolves as each data point arrives...") | |
| animation_path = create_bayesian_learning_animation( | |
| save_path="bayesian_epidemic.gif", | |
| fps=1, # Base fps (overridden by custom durations) | |
| dpi=120 # Higher quality for single plot | |
| ) | |
| if animation_path: | |
| print(f"\nπ SUCCESS! Animation created: {animation_path}") | |
| print("\nπ Educational Value:") | |
| print(" β Shows prior β posterior progression") | |
| print(" β Demonstrates uncertainty quantification") | |
| print(" β Visualizes Bayesian learning process") | |
| print(" β Perfect for teaching epidemic modeling") | |
| print(" β Great for presentations and gists!") | |
| else: | |
| print("\nβ Animation creation failed - check dependencies") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment