Skip to content

Instantly share code, notes, and snippets.

@thomasahle
Created October 19, 2025 07:30
Show Gist options
  • Select an option

  • Save thomasahle/fa21722785650b3b3e111d6708926bab to your computer and use it in GitHub Desktop.

Select an option

Save thomasahle/fa21722785650b3b3e111d6708926bab to your computer and use it in GitHub Desktop.
# Simulate centered Bernoulli(p) random walk survival in [-1, 1]
# and plot the estimated survival probability.
#
# How it works
# ------------
# For X_i ~ Bernoulli(p), define S_j = sum_{i=0}^j (X_i - p).
# For a trial "survives" up to time n if, for every j in {0,...,n},
# we have -1 <= S_j <= 1. We estimate the survival probability by
# Monte Carlo over many independent trials.
#
# You can tweak the parameters in the "CONFIG" section. By default,
# we fix p and plot the survival probability as a function of n=1..N_MAX.
#
# Notes
# -----
# - Uses only NumPy + Matplotlib (no seaborn).
# - Each chart is a single plot (no subplots).
# - No specific colors/styles are set (besides line style to differentiate),
# to satisfy the plotting constraints.
# - For large n, the true survival probability may be extremely small; increase
# `TRIALS` to reduce variance, or limit to moderate n.
#
# Author: ChatGPT
from typing import Sequence, Optional, Tuple
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
# For displaying tables nicely in the UI
from caas_jupyter_tools import display_dataframe_to_user
def _simulate_survival_once_matrix(p: float, steps: int, trials: int, seed: Optional[int] = None
) -> Tuple[np.ndarray, np.ndarray]:
"""
Vectorized simulation:
- Generate a (trials x steps) matrix of Bernoulli(p) draws for X_0,...,X_{steps-1}.
- Form increments (X_i - p), take cumulative sums, and compute max |S_j| over j.
Returns:
cumsums: the cumulative sums matrix (trials x steps)
max_abs_prefix: the running max of |cumsums| along the time axis (trials x steps)
"""
rng = np.random.default_rng(seed)
# Bernoulli draws for X_0,...,X_{steps-1}
X = (rng.random((trials, steps)) < p).astype(np.float32)
increments = X - np.float32(p) # centered increments in {-p, 1-p}
cumsums = np.cumsum(increments, axis=1, dtype=np.float32)
# Running maximum of absolute partial sums along the time axis
max_abs_prefix = np.maximum.accumulate(np.abs(cumsums), axis=1)
return cumsums, max_abs_prefix
def estimate_survival_prob(p: float, n: int, trials: int, seed: Optional[int] = None
) -> Tuple[float, float]:
"""
Estimate Pr[ max_{0<=j<=n} |S_j| <= 1 ] via Monte Carlo with `trials` independent runs.
Returns (probability_estimate, standard_error).
"""
steps = n + 1 # because S_j uses X_0..X_j inclusive
_, max_abs_prefix = _simulate_survival_once_matrix(p, steps, trials, seed=seed)
survive = max_abs_prefix[:, n] <= 1.0 + 1e-12 # include j = n
phat = float(np.mean(survive))
se = math.sqrt(phat * max(1.0 - phat, 0.0) / trials)
return phat, se
def scan_vs_n(p: float, n_max: int, trials: int, seed: Optional[int] = None,
n_values: Optional[Sequence[int]] = None) -> pd.DataFrame:
"""
For a fixed p, estimate survival probability for each n in n_values (default 1..n_max).
Efficiently reuses the same simulated matrix for all prefixes 0..n_max.
Returns a DataFrame with columns: ['n','p','prob','se','log_prob_over_n','exp_np','bound_exp','bound_one_over','bound_entropy'].
"""
steps = n_max + 1
_, max_abs_prefix = _simulate_survival_once_matrix(p, steps, trials, seed=seed)
if n_values is None:
n_values = list(range(1, n_max + 1))
records = []
p1mp = p * (1.0 - p)
H = p * math.log(1.0 / p) + (1.0 - p) * math.log(1.0 / (1.0 - p))
for n in n_values:
survive = max_abs_prefix[:, n] <= 1.0 + 1e-12
phat = float(np.mean(survive))
se = math.sqrt(phat * max(1.0 - phat, 0.0) / trials)
log_over_n = (math.log(phat) / n) if phat > 0 else float("nan")
records.append({
"n": n,
"p": p,
"prob": phat,
"se": se,
"log_prob_over_n": log_over_n,
"bound_exp": math.exp(-n * p1mp), # exp(-n p(1-p))
"bound_one_over": (1.0 + p1mp) ** (-n), # (1 + p(1-p))^{-n}
"bound_entropy": math.exp(-n * H), # exp(-n H(p))
})
return pd.DataFrame.from_records(records)
def scan_vs_p(n: int, p_values: Sequence[float], trials: int, seed: Optional[int] = None) -> pd.DataFrame:
"""
For a fixed n, estimate survival probability for each p in p_values.
Returns a DataFrame with columns: ['p','n','prob','se','log_prob_over_n','bound_exp','bound_one_over','bound_entropy'].
"""
records = []
for p in p_values:
phat, se = estimate_survival_prob(p, n, trials, seed=None if seed is None else (seed + int(1e6 * p)))
p1mp = p * (1.0 - p)
H = p * math.log(1.0 / p) + (1.0 - p) * math.log(1.0 / (1.0 - p))
log_over_n = (math.log(phat) / n) if phat > 0 else float("nan")
records.append({
"p": p,
"n": n,
"prob": phat,
"se": se,
"log_prob_over_n": log_over_n,
"bound_exp": math.exp(-n * p1mp),
"bound_one_over": (1.0 + p1mp) ** (-n),
"bound_entropy": math.exp(-n * H),
})
return pd.DataFrame.from_records(records)
def plot_vs_n(df: pd.DataFrame) -> None:
"""
Plot survival probability vs n and overlay the candidate bounds for the same p.
"""
p = float(df["p"].iloc[0])
plt.figure()
plt.plot(df["n"], df["prob"], marker="o", label="Simulated survival prob.")
plt.plot(df["n"], df["bound_exp"], linestyle="--", label="exp(-n p(1-p))")
plt.plot(df["n"], df["bound_one_over"], linestyle=":", label="(1 + p(1-p))^{-n}")
plt.plot(df["n"], df["bound_entropy"], linestyle="-.", label="exp(-n H(p))")
plt.xlabel("n")
plt.ylabel("Survival probability (stay in [-1,1])")
plt.title(f"Centered Bernoulli(p) walk survival in [-1,1] (p={p:.3f})")
plt.legend()
plt.tight_layout()
plt.show()
def plot_logprob_over_n_vs_p(df: pd.DataFrame) -> None:
"""
Plot (log survival probability)/n vs p with the two candidate exponents,
similar in spirit to the figure described.
"""
n = int(df["n"].iloc[0])
plt.figure()
plt.plot(df["p"], df["log_prob_over_n"], marker="o", label="Simulated (log P)/n")
# Overlay the conjectured curves transformed likewise
plt.plot(df["p"], np.log(df["bound_exp"]) / n, linestyle="--", label="-(p(1-p))")
plt.plot(df["p"], np.log(df["bound_one_over"]) / n, linestyle=":", label="-log(1 + p(1-p))")
plt.plot(df["p"], np.log(df["bound_entropy"]) / n, linestyle="-.", label="-H(p)")
plt.xlabel("p")
plt.ylabel("(log survival probability) / n")
plt.title(f"(log P_survival)/n vs p (n={n})")
plt.legend()
plt.tight_layout()
plt.show()
# ======================
# ====== CONFIG ========
# ======================
# Choose one of the following modes:
# "single": estimate for one (p, n) and print the number.
# "vary_n": fix p and plot survival probability vs n=1..N_MAX (default).
# "vary_p": fix n and plot (log P)/n vs p on a grid.
MODE = "vary_n" # "single", "vary_n", or "vary_p"
# Common simulation controls
TRIALS = 20000 # increase for more accuracy (at the cost of runtime)
SEED = 42 # set None for different randomness each run
# Parameters for MODE == "single"
P_SINGLE = 0.30
N_SINGLE = 200
# Parameters for MODE == "vary_n"
P_FIXED = 0.30
N_MAX = 200
# Optionally restrict to a subset of n to speed things up (None means 1..N_MAX)
N_VALUES = None # e.g., [10, 20, 30, 40, 50, 75, 100, 150, 200]
# Parameters for MODE == "vary_p"
N_FIXED = 150
P_VALUES = np.round(np.linspace(0.05, 0.95, 19), 3) # 0.05, 0.10, ..., 0.95
# ======================
# ====== RUN ===========
# ======================
if MODE == "single":
phat, se = estimate_survival_prob(P_SINGLE, N_SINGLE, TRIALS, seed=SEED)
print(f"Estimated survival probability for p={P_SINGLE}, n={N_SINGLE} over {TRIALS} trials: {phat:.6g} ± {se:.2g}")
elif MODE == "vary_n":
df = scan_vs_n(P_FIXED, N_MAX, TRIALS, seed=SEED, n_values=N_VALUES)
# Show table for reference
display_dataframe_to_user("Survival vs n", df)
# Plot
plot_vs_n(df)
else: # MODE == "vary_p"
df = scan_vs_p(N_FIXED, P_VALUES, TRIALS, seed=SEED)
display_dataframe_to_user("(log P_survival)/n vs p", df)
plot_logprob_over_n_vs_p(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment