Skip to content

Instantly share code, notes, and snippets.

@al6x
Created July 30, 2025 10:31
Show Gist options
  • Save al6x/a2431ee9f9c65e5394595d47f1d9972f to your computer and use it in GitHub Desktop.
Save al6x/a2431ee9f9c65e5394595d47f1d9972f to your computer and use it in GitHub Desktop.
Estimating Tail, Hill Plot vs MLE
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
from scipy.optimize import minimize_scalar
def student_sample(df, n):
return t.rvs(df, loc=0, scale=1, size=n)
def estimate_hill(x):
x = np.sort(x)[::-1]
logx = np.log(x)
sumk = np.cumsum(logx)
k = np.arange(1, len(x)+1)
denom = (sumk/k) - logx
return 1/denom
def fit_mle_student(x):
def nll(df):
return np.inf if df <= 2 else -np.sum(t.logpdf(x, df, loc=0, scale=1))
res = minimize_scalar(nll, bounds=(2.01, 100), method='bounded')
if not res.success:
raise RuntimeError("MLE fit failed")
return res.x
# Parameters
N = 30
n_obs = 10_000
k = 500
plt.figure(figsize=(8, 6))
for _ in range(N):
x = student_sample(4, n_obs)
tail = np.sort(x)[-k:][::-1]
hill = estimate_hill(tail)
plt.plot(hill, linewidth=1, alpha=0.7, color='black')
mle_df = fit_mle_student(x)
plt.axhline(mle_df, linewidth=1, alpha=0.7, color='red')
plt.ylim(2, 6)
plt.xlabel('Order-stats rank k')
plt.ylabel('Tail index estimate')
plt.title(f'Hill estimates (black) and MLE (red) over {N} samples of size {n_obs}. True value = 4')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment