Last active
July 14, 2018 17:39
-
-
Save conquistadorjd/667343342043abd34911d34b5c98392c to your computer and use it in GitHub Desktop.
statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: correlationexamples-00.py | |
# desc: Correlations | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# remark : goodman_kruskal_gamma formula taken from https://github.com/shilad/context-sensitive-sr/blob/master/SRSurvey/src/python/correlation.py | |
################################################################################################ | |
from matplotlib import pyplot as plt | |
import numpy as np | |
from scipy import stats | |
from itertools import combinations, permutations | |
def goodman_kruskal_gamma(m, n): | |
""" | |
compute the Goodman and Kruskal gamma rank correlation coefficient; | |
this statistic ignores ties is unsuitable when the number of ties in the | |
data is high. it's also slow. | |
>>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4] | |
>>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3] | |
>>> goodman_kruskal_gamma(x, y) | |
0.9166666666666666 | |
""" | |
num = 0 | |
den = 0 | |
for (i, j) in permutations(range(len(m)), 2): | |
m_dir = m[i] - m[j] | |
n_dir = n[i] - n[j] | |
sign = m_dir * n_dir | |
if sign > 0: | |
num += 1 | |
den += 1 | |
elif sign < 0: | |
num -= 1 | |
den += 1 | |
return num / float(den) | |
print('*** Program Started ***') | |
# x=[1,2,3,4,5] | |
y1=[101,102,103,104,105,106,107] | |
y2=[101,100,99,98,97,96,95] | |
y3=[101,102,101,102,101,102,102] | |
y4=[101,102,101,101,101,102,103] | |
# y3=y2 | |
# y4=y1 | |
x=np.arange(len(y1)) | |
pc = stats.pearsonr(x,y1) | |
tau = stats.kendalltau(x,y1) | |
rho = stats.spearmanr(x,y1) | |
gamma = goodman_kruskal_gamma(x,y1) | |
ax1 = plt.subplot(221) | |
plt.scatter(x,y1,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma)) | |
pc = stats.pearsonr(x,y2) | |
tau = stats.kendalltau(x,y2) | |
rho = stats.spearmanr(x,y2) | |
gamma = goodman_kruskal_gamma(x,y2) | |
ax2 = plt.subplot(222) | |
plt.scatter(x,y2,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma)) | |
pc = stats.pearsonr(x,y3) | |
tau = stats.kendalltau(x,y3) | |
rho = stats.spearmanr(x,y3) | |
gamma = goodman_kruskal_gamma(x,y3) | |
ax2 = plt.subplot(223) | |
plt.scatter(x,y3,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma)) | |
pc = stats.pearsonr(x,y4) | |
tau = stats.kendalltau(x,y4) | |
rho = stats.spearmanr(x,y4) | |
gamma = goodman_kruskal_gamma(x,y4) | |
ax2 = plt.subplot(224) | |
plt.scatter(x,y4,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma)) | |
# Saving image | |
plt.savefig('correlationexamples-01.png') | |
# In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-01-statsmodels.py | |
# desc: linear regression using statsmodels | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# reference: http://www.statsmodels.org/dev/examples/notebooks/generated/ols.html | |
################################################################################################ | |
import numpy as np | |
import statsmodels.api as sm | |
from scipy import stats | |
import matplotlib.pyplot as plt | |
print('*** Program started ***') | |
##################################### Testing different patterns | |
y1=[101,102,103,104,105,106,107] | |
y2=[101,100,99,98,97,96,95] | |
y3=[101,102,101,102,101,102,101] | |
y4=[101,103,105,107,109,111,115] | |
y5=[101,103,102,105,102,107,105] | |
y6=[1,2,3,4,5,6,7] | |
y=y5 | |
x =np.arange(len(y)) | |
x = x +1 | |
# this is to preserve original x values to be used for plotting | |
x1=x | |
# This is needed as per statsmodel documentation | |
x=sm.add_constant(x) | |
##################################### regression | |
model = sm.OLS(y,x) | |
results = model.fit() | |
# print(results.summary()) | |
# print('Parameters: ', results.params) | |
print( 'results.params : ',results.params) | |
# pc = stats.pearsonr(x,y5) | |
# print(pc) | |
# tau = stats.kendalltau(x,y5) | |
# print(tau) | |
# rho = stats.spearmanr(x,y5) | |
# print(rho) | |
# creating regression line | |
xx= x1 | |
yy = results.params[0] + x1*results.params[1] | |
plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
plt.plot(xx,yy) | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8) | |
# Saving image | |
plt.savefig('linear-regression-01-statsmodels.png') | |
# # In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-02-scikit-learn.py | |
# desc: linear regression using scikit-learn | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# reference: http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html | |
################################################################################################ | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import datasets, linear_model | |
from sklearn.metrics import mean_squared_error, r2_score | |
from scipy import stats | |
print('*** Program started ***') | |
##################################### Testing different patterns | |
y1=[101,102,103,104,105,106,107] | |
y2=[101,100,99,98,97,96,95] | |
y3=[101,102,101,102,101,102,101] | |
y4=[101,103,105,107,109,111,115] | |
y5=[101,103,102,105,102,107,105] | |
y=[1,2,3,4,5,6,7] | |
y=y5 | |
x=np.arange(len(y)) | |
x1=np.arange(len(y)) | |
x = x +1 # to ensure count is starting from 1 | |
x = np.array(x).reshape(-1, 1) | |
##################################### regression | |
regr = linear_model.LinearRegression() | |
regr.fit(x, y) | |
print('Coefficients: \n', regr) | |
m=regr.coef_[0] | |
b=regr.intercept_ | |
print("slope=",m, "\nintercept=",b) | |
pc = stats.pearsonr(x1,y) | |
print(pc) | |
# tau = stats.kendalltau(x,y) | |
# print(tau) | |
# rho = stats.spearmanr(x,y) | |
# print(rho) | |
xx= x | |
yy = regr.predict(xx) | |
plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
plt.plot(xx,yy) | |
# plt.xlabel('Sample x Axis') | |
# plt.ylabel('Sample y Axis') | |
# plt.legend(loc=2) | |
# plt.grid(color='black', linestyle='-', linewidth=0.5) | |
# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8) | |
# Saving image | |
plt.savefig('linear-regression-02-scikit-learn.png') | |
# In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-03-scipy.py | |
# desc: linear regression using scipy | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html | |
################################################################################################ | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import datasets, linear_model | |
from sklearn.metrics import mean_squared_error, r2_score | |
from scipy import stats | |
print('*** Program started ***') | |
##################################### Testing different patterns | |
y1=[101,102,103,104,105,106,107] | |
y2=[101,100,99,98,97,96,95] | |
y3=[101,102,101,102,101,102,101] | |
y4=[101,103,105,107,109,111,115] | |
y5=[101,103,102,105,102,107,105] | |
y6=[1,2,3,4,5,6,7] | |
y=y5 | |
x=np.arange(len(y)) | |
x1=np.arange(len(y)) | |
x = x +1 # to ensure count is starting from 1 | |
##################################### regression | |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) | |
print('Coefficients: \n', slope, intercept, r_value, p_value, std_err) | |
pc = stats.pearsonr(x1,y) | |
print(pc) | |
# tau = stats.kendalltau(x,y) | |
# print(tau) | |
# rho = stats.spearmanr(x,y) | |
# print(rho) | |
# xx= x | |
# yy = regr.predict(xx) | |
plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur") | |
plt.plot(x, intercept + slope*x, label='fitted line') | |
# # plt.xlabel('Sample x Axis') | |
# # plt.ylabel('Sample y Axis') | |
# # plt.legend(loc=2) | |
# # plt.grid(color='black', linestyle='-', linewidth=0.5) | |
# # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8) | |
# # Saving image | |
plt.savefig('linear-regression-03-scipy.png') | |
# # In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment