Created
October 19, 2025 07:30
-
-
Save thomasahle/fa21722785650b3b3e111d6708926bab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Simulate centered Bernoulli(p) random walk survival in [-1, 1] | |
| # and plot the estimated survival probability. | |
| # | |
| # How it works | |
| # ------------ | |
| # For X_i ~ Bernoulli(p), define S_j = sum_{i=0}^j (X_i - p). | |
| # For a trial "survives" up to time n if, for every j in {0,...,n}, | |
| # we have -1 <= S_j <= 1. We estimate the survival probability by | |
| # Monte Carlo over many independent trials. | |
| # | |
| # You can tweak the parameters in the "CONFIG" section. By default, | |
| # we fix p and plot the survival probability as a function of n=1..N_MAX. | |
| # | |
| # Notes | |
| # ----- | |
| # - Uses only NumPy + Matplotlib (no seaborn). | |
| # - Each chart is a single plot (no subplots). | |
| # - No specific colors/styles are set (besides line style to differentiate), | |
| # to satisfy the plotting constraints. | |
| # - For large n, the true survival probability may be extremely small; increase | |
| # `TRIALS` to reduce variance, or limit to moderate n. | |
| # | |
| # Author: ChatGPT | |
| from typing import Sequence, Optional, Tuple | |
| import numpy as np | |
| import pandas as pd | |
| import math | |
| import matplotlib.pyplot as plt | |
| # For displaying tables nicely in the UI | |
| from caas_jupyter_tools import display_dataframe_to_user | |
| def _simulate_survival_once_matrix(p: float, steps: int, trials: int, seed: Optional[int] = None | |
| ) -> Tuple[np.ndarray, np.ndarray]: | |
| """ | |
| Vectorized simulation: | |
| - Generate a (trials x steps) matrix of Bernoulli(p) draws for X_0,...,X_{steps-1}. | |
| - Form increments (X_i - p), take cumulative sums, and compute max |S_j| over j. | |
| Returns: | |
| cumsums: the cumulative sums matrix (trials x steps) | |
| max_abs_prefix: the running max of |cumsums| along the time axis (trials x steps) | |
| """ | |
| rng = np.random.default_rng(seed) | |
| # Bernoulli draws for X_0,...,X_{steps-1} | |
| X = (rng.random((trials, steps)) < p).astype(np.float32) | |
| increments = X - np.float32(p) # centered increments in {-p, 1-p} | |
| cumsums = np.cumsum(increments, axis=1, dtype=np.float32) | |
| # Running maximum of absolute partial sums along the time axis | |
| max_abs_prefix = np.maximum.accumulate(np.abs(cumsums), axis=1) | |
| return cumsums, max_abs_prefix | |
| def estimate_survival_prob(p: float, n: int, trials: int, seed: Optional[int] = None | |
| ) -> Tuple[float, float]: | |
| """ | |
| Estimate Pr[ max_{0<=j<=n} |S_j| <= 1 ] via Monte Carlo with `trials` independent runs. | |
| Returns (probability_estimate, standard_error). | |
| """ | |
| steps = n + 1 # because S_j uses X_0..X_j inclusive | |
| _, max_abs_prefix = _simulate_survival_once_matrix(p, steps, trials, seed=seed) | |
| survive = max_abs_prefix[:, n] <= 1.0 + 1e-12 # include j = n | |
| phat = float(np.mean(survive)) | |
| se = math.sqrt(phat * max(1.0 - phat, 0.0) / trials) | |
| return phat, se | |
| def scan_vs_n(p: float, n_max: int, trials: int, seed: Optional[int] = None, | |
| n_values: Optional[Sequence[int]] = None) -> pd.DataFrame: | |
| """ | |
| For a fixed p, estimate survival probability for each n in n_values (default 1..n_max). | |
| Efficiently reuses the same simulated matrix for all prefixes 0..n_max. | |
| Returns a DataFrame with columns: ['n','p','prob','se','log_prob_over_n','exp_np','bound_exp','bound_one_over','bound_entropy']. | |
| """ | |
| steps = n_max + 1 | |
| _, max_abs_prefix = _simulate_survival_once_matrix(p, steps, trials, seed=seed) | |
| if n_values is None: | |
| n_values = list(range(1, n_max + 1)) | |
| records = [] | |
| p1mp = p * (1.0 - p) | |
| H = p * math.log(1.0 / p) + (1.0 - p) * math.log(1.0 / (1.0 - p)) | |
| for n in n_values: | |
| survive = max_abs_prefix[:, n] <= 1.0 + 1e-12 | |
| phat = float(np.mean(survive)) | |
| se = math.sqrt(phat * max(1.0 - phat, 0.0) / trials) | |
| log_over_n = (math.log(phat) / n) if phat > 0 else float("nan") | |
| records.append({ | |
| "n": n, | |
| "p": p, | |
| "prob": phat, | |
| "se": se, | |
| "log_prob_over_n": log_over_n, | |
| "bound_exp": math.exp(-n * p1mp), # exp(-n p(1-p)) | |
| "bound_one_over": (1.0 + p1mp) ** (-n), # (1 + p(1-p))^{-n} | |
| "bound_entropy": math.exp(-n * H), # exp(-n H(p)) | |
| }) | |
| return pd.DataFrame.from_records(records) | |
| def scan_vs_p(n: int, p_values: Sequence[float], trials: int, seed: Optional[int] = None) -> pd.DataFrame: | |
| """ | |
| For a fixed n, estimate survival probability for each p in p_values. | |
| Returns a DataFrame with columns: ['p','n','prob','se','log_prob_over_n','bound_exp','bound_one_over','bound_entropy']. | |
| """ | |
| records = [] | |
| for p in p_values: | |
| phat, se = estimate_survival_prob(p, n, trials, seed=None if seed is None else (seed + int(1e6 * p))) | |
| p1mp = p * (1.0 - p) | |
| H = p * math.log(1.0 / p) + (1.0 - p) * math.log(1.0 / (1.0 - p)) | |
| log_over_n = (math.log(phat) / n) if phat > 0 else float("nan") | |
| records.append({ | |
| "p": p, | |
| "n": n, | |
| "prob": phat, | |
| "se": se, | |
| "log_prob_over_n": log_over_n, | |
| "bound_exp": math.exp(-n * p1mp), | |
| "bound_one_over": (1.0 + p1mp) ** (-n), | |
| "bound_entropy": math.exp(-n * H), | |
| }) | |
| return pd.DataFrame.from_records(records) | |
| def plot_vs_n(df: pd.DataFrame) -> None: | |
| """ | |
| Plot survival probability vs n and overlay the candidate bounds for the same p. | |
| """ | |
| p = float(df["p"].iloc[0]) | |
| plt.figure() | |
| plt.plot(df["n"], df["prob"], marker="o", label="Simulated survival prob.") | |
| plt.plot(df["n"], df["bound_exp"], linestyle="--", label="exp(-n p(1-p))") | |
| plt.plot(df["n"], df["bound_one_over"], linestyle=":", label="(1 + p(1-p))^{-n}") | |
| plt.plot(df["n"], df["bound_entropy"], linestyle="-.", label="exp(-n H(p))") | |
| plt.xlabel("n") | |
| plt.ylabel("Survival probability (stay in [-1,1])") | |
| plt.title(f"Centered Bernoulli(p) walk survival in [-1,1] (p={p:.3f})") | |
| plt.legend() | |
| plt.tight_layout() | |
| plt.show() | |
| def plot_logprob_over_n_vs_p(df: pd.DataFrame) -> None: | |
| """ | |
| Plot (log survival probability)/n vs p with the two candidate exponents, | |
| similar in spirit to the figure described. | |
| """ | |
| n = int(df["n"].iloc[0]) | |
| plt.figure() | |
| plt.plot(df["p"], df["log_prob_over_n"], marker="o", label="Simulated (log P)/n") | |
| # Overlay the conjectured curves transformed likewise | |
| plt.plot(df["p"], np.log(df["bound_exp"]) / n, linestyle="--", label="-(p(1-p))") | |
| plt.plot(df["p"], np.log(df["bound_one_over"]) / n, linestyle=":", label="-log(1 + p(1-p))") | |
| plt.plot(df["p"], np.log(df["bound_entropy"]) / n, linestyle="-.", label="-H(p)") | |
| plt.xlabel("p") | |
| plt.ylabel("(log survival probability) / n") | |
| plt.title(f"(log P_survival)/n vs p (n={n})") | |
| plt.legend() | |
| plt.tight_layout() | |
| plt.show() | |
| # ====================== | |
| # ====== CONFIG ======== | |
| # ====================== | |
| # Choose one of the following modes: | |
| # "single": estimate for one (p, n) and print the number. | |
| # "vary_n": fix p and plot survival probability vs n=1..N_MAX (default). | |
| # "vary_p": fix n and plot (log P)/n vs p on a grid. | |
| MODE = "vary_n" # "single", "vary_n", or "vary_p" | |
| # Common simulation controls | |
| TRIALS = 20000 # increase for more accuracy (at the cost of runtime) | |
| SEED = 42 # set None for different randomness each run | |
| # Parameters for MODE == "single" | |
| P_SINGLE = 0.30 | |
| N_SINGLE = 200 | |
| # Parameters for MODE == "vary_n" | |
| P_FIXED = 0.30 | |
| N_MAX = 200 | |
| # Optionally restrict to a subset of n to speed things up (None means 1..N_MAX) | |
| N_VALUES = None # e.g., [10, 20, 30, 40, 50, 75, 100, 150, 200] | |
| # Parameters for MODE == "vary_p" | |
| N_FIXED = 150 | |
| P_VALUES = np.round(np.linspace(0.05, 0.95, 19), 3) # 0.05, 0.10, ..., 0.95 | |
| # ====================== | |
| # ====== RUN =========== | |
| # ====================== | |
| if MODE == "single": | |
| phat, se = estimate_survival_prob(P_SINGLE, N_SINGLE, TRIALS, seed=SEED) | |
| print(f"Estimated survival probability for p={P_SINGLE}, n={N_SINGLE} over {TRIALS} trials: {phat:.6g} ± {se:.2g}") | |
| elif MODE == "vary_n": | |
| df = scan_vs_n(P_FIXED, N_MAX, TRIALS, seed=SEED, n_values=N_VALUES) | |
| # Show table for reference | |
| display_dataframe_to_user("Survival vs n", df) | |
| # Plot | |
| plot_vs_n(df) | |
| else: # MODE == "vary_p" | |
| df = scan_vs_p(N_FIXED, P_VALUES, TRIALS, seed=SEED) | |
| display_dataframe_to_user("(log P_survival)/n vs p", df) | |
| plot_logprob_over_n_vs_p(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment