Skip to content

Instantly share code, notes, and snippets.

@tonyreina
Last active October 11, 2025 06:36
Show Gist options
  • Save tonyreina/f3181e598414f26bd2392c1e416bfc02 to your computer and use it in GitHub Desktop.
Save tonyreina/f3181e598414f26bd2392c1e416bfc02 to your computer and use it in GitHub Desktop.
Epidemic curve with Bayesian modeling
# 🎬 ANIMATED BAYESIAN LEARNING DEMONSTRATION
# Create a GIF showing how the model evolves as each data point is added
# 🦠 BAYESIAN EPIDEMIC MODELING FOR STUDENTS
# Complete demonstration in a single cell
# Shows uncertainty quantification for public health decision-making
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import cho_factor, cho_solve
from scipy.stats import norm
import warnings
warnings.filterwarnings('ignore')
# Set up plotting
plt.style.use('default')
plt.rcParams['figure.figsize'] = (15, 10)
print("🦠 Bayesian Epidemic Modeling Demo")
print("=" * 50)
# =============================================================================
# STEP 1: CREATE REALISTIC EPIDEMIC SCENARIO
# =============================================================================
def true_epidemic_curve(t):
"""Realistic epidemic with two distinct waves optimized for polynomial fitting"""
# First wave: smaller, earlier peak
wave1_center, wave1_height, wave1_width = 35, 22, 12
wave1 = wave1_height * np.exp(-0.5 * ((t - wave1_center) / wave1_width) ** 2)
# Second wave: larger, later peak with asymmetric shape
wave2_center, wave2_height = 85, 32
wave2_rise_width, wave2_fall_width = 15, 25 # Asymmetric: faster rise, slower fall
wave2 = np.where(t <= wave2_center,
wave2_height * np.exp(-0.5 * ((t - wave2_center) / wave2_rise_width) ** 2),
wave2_height * np.exp(-0.5 * ((t - wave2_center) / wave2_fall_width) ** 2))
# Baseline level with slight trend
baseline = 4 + 0.02 * (t - 60) # Slight increase over time
baseline = np.maximum(baseline, 2) # Minimum baseline of 2
total = baseline + wave1 + wave2
return np.maximum(total, 0) # Ensure non-negative
# Generate true epidemic curve
days_true = np.linspace(0, 120, 300)
infections_true = true_epidemic_curve(days_true)
# =============================================================================
# STEP 2: SIMULATE REALISTIC SURVEILLANCE DATA
# =============================================================================
np.random.seed(42) # Reproducible results
# Sparse observation times (realistic reporting)
observed_days = np.array([5, 12, 18, 25, 32, 38, 45, 52, 58, 65, 72, 79, 86, 93, 100, 107])
# Add measurement noise
noise_level = 3.0
true_values = true_epidemic_curve(observed_days)
observed_cases = true_values + np.random.normal(0, noise_level, len(observed_days))
observed_cases = np.maximum(observed_cases, 0) # No negative cases
print(f"πŸ“Š Surveillance data: {len(observed_days)} observations with noise level {noise_level}")
# =============================================================================
# STEP 3: BAYESIAN POLYNOMIAL REGRESSION
# =============================================================================
def create_epidemic_features(X, n_features=12):
"""Create advanced epidemic-specific basis functions optimized for two-wave patterns"""
X = np.array(X).flatten()
if len(X) == 0:
return np.array([]).reshape(0, n_features)
features = []
# 1. Constant baseline (background infection level)
features.append(np.ones(len(X)))
# 2-3. Linear and quadratic trends for overall epidemic trajectory
X_norm = X / 120.0
features.append(X_norm)
features.append(X_norm ** 2)
# 4-7. Multiple Gaussian bells for epidemic waves with different widths
wave_centers = [30, 45, 75, 90] # Optimized for two-wave pattern
wave_widths = [15, 12, 18, 15] # Different widths for different wave types
for center, width in zip(wave_centers, wave_widths):
features.append(np.exp(-0.5 * ((X - center) / width) ** 2))
# 8-9. Asymmetric wave functions (log-normal style for realistic epidemic curves)
for center in [35, 85]:
# Skewed Gaussian (faster rise, slower decline - typical epidemic pattern)
left_wing = np.where(X <= center,
np.exp(-0.5 * ((X - center) / 10) ** 2),
np.exp(-0.5 * ((X - center) / 20) ** 2))
features.append(left_wing)
# 10-11. Logistic growth curves for epidemic phases
for inflection in [25, 80]:
steepness = 0.15
features.append(1 / (1 + np.exp(-steepness * (X - inflection))))
# 12. Interaction term for wave interference
if n_features >= 12:
wave1_component = np.exp(-0.5 * ((X - 35) / 15) ** 2)
wave2_component = np.exp(-0.5 * ((X - 85) / 20) ** 2)
features.append(wave1_component * wave2_component)
return np.column_stack(features[:n_features])
def bayesian_regression(X, y, n_features=12, alpha=1.0, beta=1.0):
"""
Enhanced Bayesian regression with adaptive regularization
Parameters:
- n_features: Number of basis functions to use
- alpha: Prior precision (higher = more regularization)
- beta: Noise precision (higher = assume less noise)
"""
Phi = create_epidemic_features(X, n_features)
# Adaptive regularization: stronger for higher-order features
alpha_vec = np.ones(Phi.shape[1]) * alpha
# Less regularization for basic functions (baseline, trends, main waves)
alpha_vec[:5] *= 0.5
# More regularization for complex interaction terms
if len(alpha_vec) > 8:
alpha_vec[8:] *= 2.0
# Bayesian update with feature-specific regularization
S_inv = np.diag(alpha_vec) + beta * (Phi.T @ Phi)
# Numerical stability with Cholesky
try:
L, lower = cho_factor(S_inv, lower=True)
except:
# Fallback for numerical issues
S_inv += 1e-6 * np.eye(S_inv.shape[0])
L, lower = cho_factor(S_inv, lower=True)
# Posterior mean and covariance
m = cho_solve((L, lower), beta * Phi.T @ y)
S = cho_solve((L, lower), np.eye(len(m)))
return m, S
def predict_with_uncertainty(X_test, m, S, n_features=12, beta=1.0):
"""Make predictions with uncertainty bands using epidemic features"""
Phi_test = create_epidemic_features(X_test, n_features)
y_mean = Phi_test @ m
y_var = 1/beta + np.sum((Phi_test @ S) * Phi_test, axis=1)
y_std = np.sqrt(y_var)
return y_mean, y_std
# =============================================================================
# STEP 4: DEMONSTRATE BAYESIAN LEARNING
# =============================================================================
# Model parameters - optimized for epidemic modeling
n_features = 12 # More features for complex epidemic patterns
alpha = 0.05 # Even lighter regularization for flexibility
beta = 1.0 / (noise_level**2)
X_pred = np.linspace(0, 120, 300)
# Show learning progression
fig = plt.figure(figsize=(20, 12))
data_sizes = [4, 8, 12, len(observed_days)]
stage_names = ['Early Outbreak', 'Growing Evidence', 'Pattern Emerging', 'Full Dataset']
for i, (n_data, stage) in enumerate(zip(data_sizes, stage_names)):
plt.subplot(2, 3, i+1)
# Use subset of data
X_train = observed_days[:n_data]
y_train = observed_cases[:n_data]
# Fit model
m, S = bayesian_regression(X_train, y_train, n_features, alpha, beta)
y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta)
# Plot results
plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.7, label='True curve')
plt.scatter(X_train, y_train, color='black', s=80, zorder=5, label=f'Data (n={n_data})')
plt.plot(X_pred, y_pred_mean, 'b-', linewidth=2, label='Bayesian prediction')
plt.fill_between(X_pred, y_pred_mean - 2*y_pred_std, y_pred_mean + 2*y_pred_std,
alpha=0.2, color='blue', label='95% confidence')
plt.xlim(0, 120)
plt.ylim(0, 70)
plt.title(f'{stage} (Day {X_train[-1]:.0f})', fontsize=12)
plt.xlabel('Days since outbreak')
plt.ylabel('Cases per 100K')
plt.grid(True, alpha=0.3)
plt.legend(fontsize=9)
# =============================================================================
# STEP 5: COMPARE DIFFERENT PRIORS
# =============================================================================
plt.subplot(2, 3, 5)
# Early data to show prior influence
early_days = observed_days[:6]
early_cases = observed_cases[:6]
priors = {'Skeptical (Ξ±=5)': 5.0, 'Balanced (Ξ±=1)': 1.0, 'Flexible (Ξ±=0.1)': 0.1}
colors = ['green', 'blue', 'purple']
plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.7, label='True curve')
plt.scatter(early_days, early_cases, color='black', s=80, zorder=5, label='Early data')
for (name, alpha_prior), color in zip(priors.items(), colors):
m, S = bayesian_regression(early_days, early_cases, n_features, alpha_prior, beta)
y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta)
plt.plot(X_pred, y_pred_mean, color=color, linewidth=2, label=name)
plt.xlim(0, 120)
plt.ylim(0, 70)
plt.title('Prior Belief Comparison', fontsize=12)
plt.xlabel('Days since outbreak')
plt.ylabel('Cases per 100K')
plt.grid(True, alpha=0.3)
plt.legend(fontsize=9)
# =============================================================================
# STEP 6: PUBLIC HEALTH DECISION MAKING
# =============================================================================
plt.subplot(2, 3, 6)
# Final model with all data
m_final, S_final = bayesian_regression(observed_days, observed_cases, n_features, alpha, beta)
y_final_mean, y_final_std = predict_with_uncertainty(X_pred, m_final, S_final, n_features, beta)
# Policy parameters
hospital_capacity = 35
intervention_day = 50
# Plot forecast
plt.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.8, label='True curve')
plt.scatter(observed_days, observed_cases, color='black', s=80, zorder=5, label='Data')
plt.plot(X_pred, y_final_mean, 'b-', linewidth=3, label='Forecast')
plt.fill_between(X_pred, y_final_mean - 2*y_final_std, y_final_mean + 2*y_final_std,
alpha=0.3, color='blue', label='95% interval')
plt.axhline(hospital_capacity, color='orange', linestyle='--', linewidth=2,
label='Hospital capacity')
plt.axvline(intervention_day, color='purple', linestyle='--', linewidth=2,
label='Intervention option')
plt.xlim(0, 120)
plt.ylim(0, 70)
plt.title('Policy Decision Support', fontsize=12)
plt.xlabel('Days since outbreak')
plt.ylabel('Cases per 100K')
plt.grid(True, alpha=0.3)
plt.legend(fontsize=9)
plt.tight_layout()
plt.show()
# =============================================================================
# STEP 7: QUANTITATIVE RISK ASSESSMENT
# =============================================================================
# Calculate key metrics for decision making
future_days = X_pred[X_pred > observed_days[-1]]
future_mean = y_final_mean[X_pred > observed_days[-1]]
future_std = y_final_std[X_pred > observed_days[-1]]
peak_idx = np.argmax(future_mean)
peak_day = future_days[peak_idx]
peak_mean = future_mean[peak_idx]
peak_std = future_std[peak_idx]
# Probability of exceeding hospital capacity
prob_exceed = 1 - norm.cdf(hospital_capacity, peak_mean, peak_std)
print("\nπŸ₯ PUBLIC HEALTH DECISION SUPPORT")
print("=" * 50)
print(f"πŸ“ˆ Predicted peak: Day {peak_day:.0f}")
print(f"πŸ“Š Peak cases: {peak_mean:.1f} Β± {peak_std:.1f} per 100K")
print(f"⚠️ Probability of exceeding hospital capacity: {prob_exceed:.1%}")
print(f"πŸ’‘ Recommendation: {'🚨 IMPLEMENT INTERVENTION' if prob_exceed > 0.3 else 'πŸ‘€ MONITOR CLOSELY'}")
print("\nπŸŽ“ KEY LEARNING POINTS")
print("=" * 50)
print("βœ… Uncertainty decreases as more data arrives")
print("βœ… Prior beliefs matter most with limited data")
print("βœ… Bayesian methods provide probabilistic forecasts")
print("βœ… Uncertainty quantification enables risk-based decisions")
print("βœ… Perfect for early outbreak response when stakes are high")
print(f"\nπŸ“‹ Model Summary: {len(observed_days)} observations, {n_features} epidemic basis functions")
print(f"🎯 Final prediction accuracy: Mean absolute error = {np.mean(np.abs(true_epidemic_curve(observed_days) - observed_cases)):.1f} cases/100K")
import matplotlib.animation as animation
from matplotlib.patches import Rectangle
import tempfile
import os
def create_bayesian_learning_animation(
save_path="bayesian_epidemic_learning.gif",
fps=1.5, # Slower for educational viewing
dpi=100
):
"""
Create an animated GIF showing Bayesian learning in action
Each frame shows the model after adding one more data point
"""
print("🎬 Creating Bayesian Learning Animation...")
print("=" * 50)
# Animation parameters
n_data_points = len(observed_days)
frames = []
# Create figure for animation - single plot only
fig, ax1 = plt.subplots(1, 1, figsize=(12, 8))
for frame_idx in range(n_data_points + 1):
print(f"πŸ“Ή Generating frame {frame_idx + 1}/{n_data_points + 1}")
# Clear axes
ax1.clear()
# Determine how much data to use
if frame_idx == 0:
# Prior only (no data)
X_train = np.array([])
y_train = np.array([])
title_suffix = "Prior Beliefs Only"
data_description = "No surveillance data yet"
else:
# Use first frame_idx data points
X_train = observed_days[:frame_idx]
y_train = observed_cases[:frame_idx]
title_suffix = f"After {frame_idx} observations"
data_description = f"Using {frame_idx} surveillance reports"
# Model predictions plot
ax1.set_xlim(0, 120)
ax1.set_ylim(0, 60)
ax1.grid(True, alpha=0.3)
ax1.set_xlabel('Days since outbreak start', fontsize=14)
ax1.set_ylabel('Daily cases per 100K population', fontsize=14)
ax1.set_title(f'Epidemic Model - {title_suffix}', fontsize=16, fontweight='bold')
# Always show the true curve
ax1.plot(days_true, infections_true, 'r-', linewidth=3, alpha=0.8,
label='True epidemic curve (unknown)', zorder=1)
if len(X_train) == 0:
# Show prior predictive distribution
m_prior = np.zeros(n_features)
# Prior covariance with weak prior precision
S_prior = np.eye(n_features) / 0.05
y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m_prior, S_prior, n_features, beta)
uncertainty_text = "Very High Uncertainty"
uncertainty_color = 'red'
else:
# Fit model with current data
m, S = bayesian_regression(X_train, y_train, n_features, alpha, beta)
y_pred_mean, y_pred_std = predict_with_uncertainty(X_pred, m, S, n_features, beta)
# Calculate uncertainty level
avg_uncertainty = np.mean(y_pred_std)
if avg_uncertainty > 8:
uncertainty_text = "High Uncertainty"
uncertainty_color = 'orange'
elif avg_uncertainty > 5:
uncertainty_text = "Moderate Uncertainty"
uncertainty_color = 'yellow'
else:
uncertainty_text = "Lower Uncertainty"
uncertainty_color = 'lightgreen'
# Plot model predictions
ax1.plot(X_pred, y_pred_mean, 'b-', linewidth=3, label='Bayesian prediction', zorder=3)
ax1.fill_between(X_pred, y_pred_mean - 2*y_pred_std, y_pred_mean + 2*y_pred_std,
alpha=0.2, color='blue', label='95% prediction interval', zorder=2)
ax1.fill_between(X_pred, y_pred_mean - y_pred_std, y_pred_mean + y_pred_std,
alpha=0.3, color='blue', label='68% prediction interval', zorder=2)
# Show observed data points
if len(X_train) > 0:
ax1.scatter(X_train, y_train, color='black', s=100, zorder=5,
label=f'Observed data (n={len(X_train)})', edgecolors='white', linewidth=2)
# Highlight the newest point
if len(X_train) > 1:
ax1.scatter(X_train[-1], y_train[-1], color='yellow', s=150, zorder=6,
marker='*', edgecolors='black', linewidth=2, label='Latest observation')
# Show future data points as grayed out
if frame_idx < n_data_points:
future_X = observed_days[frame_idx:]
future_y = observed_cases[frame_idx:]
ax1.scatter(future_X, future_y, color='lightgray', s=60, alpha=0.5, zorder=1,
label='Future data (unknown)')
# Add uncertainty indicator
ax1.text(0.02, 0.98, f'Uncertainty Level: {uncertainty_text}',
transform=ax1.transAxes, fontsize=11, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.5', facecolor=uncertainty_color, alpha=0.7),
verticalalignment='top')
# Add data info
ax1.text(0.02, 0.02, data_description,
transform=ax1.transAxes, fontsize=10,
bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue', alpha=0.7),
verticalalignment='bottom')
ax1.legend(loc='upper right', fontsize=11)
# Add frame number
fig.suptitle(f'Bayesian Learning: Every observation updates the model',
fontsize=18, fontweight='bold')
plt.tight_layout()
# Save frame to temporary file
temp_file = f'temp_frame_{frame_idx:03d}.png'
plt.savefig(temp_file, dpi=dpi, bbox_inches='tight', facecolor='white')
frames.append(temp_file)
# Create GIF from frames
print(f"\nπŸŽ₯ Assembling {len(frames)} frames into GIF...")
# Import imageio for GIF creation
try:
import imageio.v2 as imageio
# Read all frames and ensure consistent dimensions
images = []
for frame_file in frames:
img = imageio.imread(frame_file)
images.append(img)
# Ensure all images have the same shape by cropping/padding to the minimum common size
if len(images) > 1:
# Find minimum dimensions
min_height = min(img.shape[0] for img in images)
min_width = min(img.shape[1] for img in images)
# Crop all images to the same size
processed_images = []
for img in images:
if len(img.shape) == 3: # RGB image
cropped = img[:min_height, :min_width, :]
else: # Grayscale
cropped = img[:min_height, :min_width]
processed_images.append(cropped)
images = processed_images
# Create custom frame durations as requested
durations = []
for i in range(len(images)):
if i < 7: # First 7 frames: 10 seconds each
durations.append(3000.0)
elif i < 12: # Next 5 frames (frames 7-11): 5 seconds each
durations.append(2000.0)
else: # Remaining frames: 1 second each
durations.append(1000.0)
# Save GIF with consistent frame sizes
imageio.mimsave(save_path, images, duration=durations, loop=0)
# Clean up temporary files
for frame_file in frames:
try:
os.remove(frame_file)
except:
pass
print(f"βœ… Animation saved to: {save_path}")
print(f"πŸ“Š Animation details:")
print(f" - {len(images)} frames")
print(f" - Dimensions: {images[0].shape}")
print(f" - Custom timing: 10s (first 7), 5s (next 5), 1s (remaining)")
print(f" - Single plot focus for clarity")
print(f" - Shows uncertainty reduction over time")
print(f" - Perfect for educational presentations")
except ImportError:
print("❌ Error: imageio not available for GIF creation")
print("πŸ’‘ Install with: pip install imageio")
return None
except Exception as e:
print(f"❌ Error creating GIF: {e}")
print("πŸ’‘ Frames were created successfully, GIF assembly failed")
return None
plt.close(fig)
return save_path
# Create the animation
print("🎬 CREATING BAYESIAN LEARNING ANIMATION")
print("=" * 50)
print("This will show how the model evolves as each data point arrives...")
animation_path = create_bayesian_learning_animation(
save_path="bayesian_epidemic.gif",
fps=1, # Base fps (overridden by custom durations)
dpi=120 # Higher quality for single plot
)
if animation_path:
print(f"\nπŸŽ‰ SUCCESS! Animation created: {animation_path}")
print("\nπŸŽ“ Educational Value:")
print(" βœ… Shows prior β†’ posterior progression")
print(" βœ… Demonstrates uncertainty quantification")
print(" βœ… Visualizes Bayesian learning process")
print(" βœ… Perfect for teaching epidemic modeling")
print(" βœ… Great for presentations and gists!")
else:
print("\n❌ Animation creation failed - check dependencies")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment