al6x · July 30, 2025 10:31
diff --git a/hill-vs-mle.py b/hill-vs-mle.py
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.stats import t
 from scipy.optimize import minimize_scalar

 def student_sample(df, n):
  return t.rvs(df, loc=0, scale=1, size=n)

 def estimate_hill(x):
  x = np.sort(x)[::-1]
  logx = np.log(x)
  sumk = np.cumsum(logx)
  k = np.arange(1, len(x)+1)
  denom = (sumk/k) - logx
  return 1/denom

 def fit_mle_student(x):
  def nll(df):
    return np.inf if df <= 2 else -np.sum(t.logpdf(x, df, loc=0, scale=1))
  res = minimize_scalar(nll, bounds=(2.01, 100), method='bounded')
  if not res.success:
    raise RuntimeError("MLE fit failed")
  return res.x

 # Parameters
 N = 30
 n_obs = 10_000
 k = 500

 plt.figure(figsize=(8, 6))
 for _ in range(N):
  x = student_sample(4, n_obs)
  tail = np.sort(x)[-k:][::-1]
  hill = estimate_hill(tail)
  plt.plot(hill, linewidth=1, alpha=0.7, color='black')
  mle_df = fit_mle_student(x)
  plt.axhline(mle_df, linewidth=1, alpha=0.7, color='red')

 plt.ylim(2, 6)
 plt.xlabel('Order-stats rank k')
 plt.ylabel('Tail index estimate')
 plt.title(f'Hill estimates (black) and MLE (red) over {N} samples of size {n_obs}. True value = 4')
 plt.show()
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy.stats import t
	from scipy.optimize import minimize_scalar

	def student_sample(df, n):
	return t.rvs(df, loc=0, scale=1, size=n)

	def estimate_hill(x):
	x = np.sort(x)[::-1]
	logx = np.log(x)
	sumk = np.cumsum(logx)
	k = np.arange(1, len(x)+1)
	denom = (sumk/k) - logx
	return 1/denom

	def fit_mle_student(x):
	def nll(df):
	return np.inf if df <= 2 else -np.sum(t.logpdf(x, df, loc=0, scale=1))
	res = minimize_scalar(nll, bounds=(2.01, 100), method='bounded')
	if not res.success:
	raise RuntimeError("MLE fit failed")
	return res.x

	# Parameters
	N = 30
	n_obs = 10_000
	k = 500

	plt.figure(figsize=(8, 6))
	for _ in range(N):
	x = student_sample(4, n_obs)
	tail = np.sort(x)[-k:][::-1]
	hill = estimate_hill(tail)
	plt.plot(hill, linewidth=1, alpha=0.7, color='black')
	mle_df = fit_mle_student(x)
	plt.axhline(mle_df, linewidth=1, alpha=0.7, color='red')

	plt.ylim(2, 6)
	plt.xlabel('Order-stats rank k')
	plt.ylabel('Tail index estimate')
	plt.title(f'Hill estimates (black) and MLE (red) over {N} samples of size {n_obs}. True value = 4')
	plt.show()