Created
April 7, 2020 04:13
-
-
Save qxj/135bb42dc2ee90a5c9b16446ec321f04 to your computer and use it in GitHub Desktop.
t test, sign test, wilcoxon signed rank test, and 0/1 distribution test.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
""" | |
t test, sign test, wilcoxon signed rank test, and 0/1 distribution test. | |
""" | |
import numpy as np | |
from scipy.special import comb | |
from scipy.stats import norm | |
from scipy.stats import t | |
from wilcoxon import wilcoxon | |
class Hypothesis: | |
def __init__(self, test_data, control_data, n_test=None, n_control=None, alpha=0.05): | |
self.test_data = np.asarray(test_data) | |
self.control_data = np.asarray(control_data) | |
self.n = self.test_data.size | |
self.diff = self.test_data - self.control_data | |
self.alpha = alpha | |
self.n_test = n_test | |
self.n_control = n_control | |
def evaluate(self): | |
if self.n == 1: | |
self.zero_one_test() | |
else: | |
self.t_test() | |
self.sign_test() | |
self.wilcoxon_signed_rank_test() | |
def t_test(self): | |
method_name = "t test" | |
sample_mean = np.mean(self.diff) | |
sample_variance = np.var(self.diff, ddof=1) # Note: set ddof to calculate sample var | |
# pvalue | |
t_statistic = sample_mean / np.sqrt(sample_variance / self.n) | |
pvalue = 1 - t.cdf(df=self.n - 1, x=abs(t_statistic)) | |
test_greater_than_control = sample_mean > 0 | |
self._print_pvalue(method_name, t_statistic, pvalue, test_greater_than_control) | |
# confidence interval | |
half_alpha_percentile = t.ppf(self.alpha / 2, self.n - 1) | |
lower_bound = sample_mean + half_alpha_percentile * np.sqrt(sample_variance / self.n) | |
upper_bound = sample_mean - half_alpha_percentile * np.sqrt(sample_variance / self.n) | |
self._print_confidence_intervel(lower_bound, upper_bound) | |
def sign_test(self): | |
method_name = "sign test" | |
# pvalue | |
pos = np.sum(self.diff > 0) | |
neg = np.sum(self.diff < 0) | |
count = pos + neg | |
statistic = min(pos, neg) | |
pvalue = 0 | |
for i in xrange(statistic + 1): | |
pvalue += comb(count, i) | |
pvalue *= np.power(0.5, count) | |
test_greater_than_control = pos > neg | |
self._print_pvalue(method_name, statistic, pvalue, test_greater_than_control) | |
# confidence interval: todo | |
def wilcoxon_signed_rank_test(self): | |
method_name = "wilcoxon signed test" | |
# pvalue | |
statistic, pvalue, test_greater_than_control = wilcoxon(self.test_data, self.control_data) | |
self._print_pvalue(method_name, statistic, pvalue, test_greater_than_control) | |
# confidence interval: todo | |
def zero_one_test(self): | |
method_name = "zero/one test" | |
# pvalue | |
p_a = self.test_data | |
p_b = self.control_data | |
n_a = self.n_test | |
n_b = self.n_control | |
test_greater_than_control = p_a > p_b | |
sample_mean = p_a - p_b | |
variance = p_a * (1 - p_a) / n_a + p_b * (1 - p_b) / n_b | |
statistic = sample_mean / np.sqrt(variance) | |
pvalue = 1 - norm.cdf(abs(statistic)) | |
self._print_pvalue(method_name, statistic, pvalue, test_greater_than_control) | |
# confidence interval | |
half_alpha_percentile = norm.ppf(self.alpha / 2) | |
lower_bound = sample_mean + half_alpha_percentile * np.sqrt(variance) | |
upper_bound = sample_mean - half_alpha_percentile * np.sqrt(variance) | |
self._print_confidence_intervel(lower_bound, upper_bound) | |
def _print_pvalue(self, method_name, statistic, pvalue, test_greater_than_control): | |
print("%s: alpha is %.4f, statistic is %.4f, pvalue is %.4f." % (method_name, self.alpha, statistic, pvalue)) | |
if pvalue > self.alpha: | |
print("\t pvalue is larger than alpha. The testing is not significant.") | |
else: | |
if test_greater_than_control: | |
print("\t pvalue is smaller than alpha. The test experiment is significantly better than control.") | |
else: | |
print("\t pvalue is smaller than alpha. The control experiment is significantly better than test.") | |
@staticmethod | |
def _print_confidence_intervel(lower_bound, upper_bound): | |
print("\t confidence intervel is [%.4f, %.4f]" % (lower_bound, upper_bound)) | |
if __name__ == '__main__': | |
# t test, sign test, wilcoxon signed rank test | |
test = [20, 13, 13, 20, 28, 32, 23, 20, 25, 15, 30] | |
control = [3, 3, 3, 12, 15, 16, 17, 19, 23, 24, 32] | |
hypo = Hypothesis(np.array(test), np.array(control)) | |
hypo.evaluate() | |
# zero/one test | |
p_a = 0.10510 | |
p_b = 0.10600 | |
n_a = 575997 | |
n_b = 858353 | |
hypo_zero_one = Hypothesis(p_a, p_b, n_a, n_b) | |
hypo_zero_one.evaluate() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, print_function, absolute_import | |
import warnings | |
import numpy as np | |
from numpy import (asarray, sqrt, compress) | |
from scipy import stats | |
def wilcoxon(x, y=None, zero_method="wilcox", correction=False): | |
""" | |
Calculate the Wilcoxon signed-rank test. It is a non-parametric version of the paired T-test. | |
Parameters | |
---------- | |
x : array_like | |
The first set of measurements. | |
y : array_like, optional | |
The second set of measurements. If `y` is not given, then the `x` | |
array is considered to be the differences between the two sets of | |
measurements. | |
zero_method : string, {"pratt", "wilcox", "zsplit"}, optional | |
"pratt": | |
Pratt treatment: includes zero-differences in the ranking process | |
(more conservative) | |
"wilcox": | |
Wilcox treatment: discards all zero-differences | |
"zsplit": | |
Zero rank split: just like Pratt, but spliting the zero rank | |
between positive and negative ones | |
correction : bool, optional | |
If True, apply continuity correction by adjusting the Wilcoxon rank | |
statistic by 0.5 towards the mean value when computing the | |
z-statistic. Default is False. | |
Returns | |
------- | |
statistic : float | |
The sum of the ranks of the differences above zero. | |
pvalue : float | |
The one-sided p-value for the test. | |
Notes | |
----- | |
Because the normal approximation is used for the calculations, the | |
samples used should be large. A typical rule is to require that | |
n > 20. | |
References | |
---------- | |
.. [1] http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test | |
""" | |
if zero_method not in ["wilcox", "pratt", "zsplit"]: | |
raise ValueError("Zero method should be either 'wilcox' " | |
"or 'pratt' or 'zsplit'") | |
if y is None: | |
d = x | |
else: | |
x, y = map(asarray, (x, y)) | |
if len(x) != len(y): | |
raise ValueError('Unequal N in wilcoxon. Aborting.') | |
d = x - y | |
if zero_method == "wilcox": | |
# Keep all non-zero differences | |
d = compress(np.not_equal(d, 0), d, axis=-1) | |
count = len(d) | |
if count < 10: | |
warnings.warn("Warning: sample size too small for normal approximation.") | |
r = stats.rankdata(abs(d)) | |
r_plus = np.sum((d > 0) * r, axis=0) | |
r_minus = np.sum((d < 0) * r, axis=0) | |
x_greater_than_y = r_plus > r_minus | |
if zero_method == "zsplit": | |
r_zero = np.sum((d == 0) * r, axis=0) | |
r_plus += r_zero / 2. | |
r_minus += r_zero / 2. | |
T = min(r_plus, r_minus) | |
mn = count * (count + 1.) * 0.25 | |
se = count * (count + 1.) * (2. * count + 1.) | |
if zero_method == "pratt": | |
r = r[d != 0] | |
replist, repnum = stats.find_repeats(r) | |
if repnum.size != 0: | |
# Correction for repeated elements. | |
se -= 0.5 * (repnum * (repnum * repnum - 1)).sum() | |
se = sqrt(se / 24) | |
correction = 0.5 * int(bool(correction)) * np.sign(T - mn) | |
z = (T - mn - correction) / se | |
prob = stats.distributions.norm.sf(abs(z)) | |
return(T, prob, x_greater_than_y) | |
if __name__ == "__main__": | |
test=[1.95,1.84,1.95,2.15,2.16,1.87,1.86,1.9,1.98,2.05,2.28,2.32,1.89,1.87] | |
control = [1.83,1.86,1.92,2.16,2.2,1.79,1.75,1.94,1.91,2,2.24,2.26,1.87,1.83] | |
print(wilcoxon(test,control)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment