Created
August 7, 2013 19:51
-
-
Save shuhaowu/6177897 to your computer and use it in GitHub Desktop.
Student t's distribution approximation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
from math import sqrt, log, pi, cos, sin, expm1 | |
_a1 = -3.969683028665376e+01 | |
_a2 = 2.209460984245205e+02 | |
_a3 = -2.759285104469687e+02 | |
_a4 = 1.383577518672690e+02 | |
_a5 = -3.066479806614716e+01 | |
_a6 = 2.506628277459239e+00 | |
_b1 = -5.447609879822406e+01 | |
_b2 = 1.615858368580409e+02 | |
_b3 = -1.556989798598866e+02 | |
_b4 = 6.680131188771972e+01 | |
_b5 = -1.328068155288572e+01 | |
_c1 = -7.784894002430293e-03 | |
_c2 = -3.223964580411365e-01 | |
_c3 = -2.400758277161838e+00 | |
_c4 = -2.549732539343734e+00 | |
_c5 = 4.374664141464968e+00 | |
_c6 = 2.938163982698783e+00 | |
_d1 = 7.784695709041462e-03 | |
_d2 = 3.224671290700398e-01 | |
_d3 = 2.445134137142996e+00 | |
_d4 = 3.754408661907416e+00 | |
_p_low = 0.02425 | |
_p_high = 1.0 - _p_low | |
def inv_norm_cdf(p): | |
"""Estimates the inverse cumulative distribution function for a | |
normal distribution. | |
Args: | |
p: the lower tail probability | |
""" | |
if 0 < p < _p_low: | |
# rational approximation for the lower region | |
q = sqrt(-2 * log(p)) | |
x = (((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1) | |
elif _p_low <= p <= _p_high: | |
# rational approximation for the central region | |
q = p - 0.5 | |
r = q * q | |
x = (((((_a1*r+_a2)*r+_a3)*r+_a4)*r+_a5)*r+_a6)*q / (((((_b1*r+_b2)*r+_b3)*r+_b4)*r+_b5)*r+1.0) | |
else: | |
# rational approximation for the upper region | |
q = sqrt(-2.0 * log(1.0 - p)) | |
x = -(((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1.0) | |
# Can't refine to maximum precision due to the lack of the erfc function. | |
# See http://home.online.no/~pjacklam/notes/invnorm/ for details. | |
return x | |
def tppf(p, df): | |
"""Calculates the percentage point function (ppf) from student t's | |
distribution. | |
Args: | |
p: two tail probability. | |
df: Degrees of freedom. | |
Notes: | |
Ported from http://www.sultanik.com/Quantile_function and | |
http://svn.r-project.org/R/trunk/src/nmath/qt.c | |
It is not exactly know if there is any problems with this | |
implementation as I simplify fixed the implementation from | |
sultanik.com with the R implementation without applying a lot of | |
fixes from the R code as a) I'm not sure what they do, b) the | |
approximation seem to be okay compared against scipy's | |
implementation, and c) i'm not sure how i can rip out the | |
function from all the R stuff. | |
I've tested the errors from a q value of 0.50 to 1.0 (the valid | |
range, increment of 0.01) with df from 1 to 149. The average err | |
is 5.28e-6 max error being 0.00277 and a stddev of 7.71e-5. | |
""" | |
if p > 1 or p <= 0: | |
raise ValueError("p value must be (0, 1]") | |
if df < 1: | |
raise ValueError("df value must be >= 1") | |
if df == 1: | |
p *= pi / 2 | |
return cos(p) / sin(p) | |
a = 1.0 / (df - 0.5) | |
b = 48 / (a * a) | |
c = ((20700 * a / b - 98) * a - 16) * a + 96.36 | |
d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi / 2) * df | |
x = d * p | |
y = x ** (2.0 / df) | |
if y > a + 0.05: | |
# asymptotic inverse expansion about the normal | |
x = inv_norm_cdf(p * 0.5) | |
y = x * x | |
if df < 5: | |
c += 0.3 * (df - 4.5) * (x + 0.6) | |
c = (((0.5 * d * x - 0.5) * x - 7.0) * x - 2.0) * x + b + c | |
y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x | |
y = expm1(a * y * y) | |
else: | |
y = ((1.0/(((df + 6.0)/(df * y) - 0.089 * d - 0.822) * (df+2.0) * 3.0) + 0.5 / (df+4.0))*y - 1.0) * (df + 1.0) / (df + 2.0) + 1.0 / y | |
return sqrt(df * y) | |
def test(): | |
from scipy.stats import t | |
from numpy import std, mean | |
""" Tests the function for errors against scipy's implementation """ | |
err = [] | |
for q in xrange(500, 1000): | |
q /= 1000 | |
for df in xrange(1, 150): | |
err.append(abs(tppf((1 - q) * 2, df) - t.ppf(q, df))) | |
print "max", max(err) | |
print "mean", mean(err) | |
print "stddev", std(err) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment