Skip to content

Instantly share code, notes, and snippets.

@conquistadorjd
Last active July 14, 2018 17:39
Show Gist options
  • Save conquistadorjd/667343342043abd34911d34b5c98392c to your computer and use it in GitHub Desktop.
Save conquistadorjd/667343342043abd34911d34b5c98392c to your computer and use it in GitHub Desktop.
statistics
################################################################################################
# name: correlationexamples-00.py
# desc: Correlations
# date: 2018-07-14
# Author: conquistadorjd
# remark : goodman_kruskal_gamma formula taken from https://github.com/shilad/context-sensitive-sr/blob/master/SRSurvey/src/python/correlation.py
################################################################################################
from matplotlib import pyplot as plt
import numpy as np
from scipy import stats
from itertools import combinations, permutations
def goodman_kruskal_gamma(m, n):
"""
compute the Goodman and Kruskal gamma rank correlation coefficient;
this statistic ignores ties is unsuitable when the number of ties in the
data is high. it's also slow.
>>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4]
>>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3]
>>> goodman_kruskal_gamma(x, y)
0.9166666666666666
"""
num = 0
den = 0
for (i, j) in permutations(range(len(m)), 2):
m_dir = m[i] - m[j]
n_dir = n[i] - n[j]
sign = m_dir * n_dir
if sign > 0:
num += 1
den += 1
elif sign < 0:
num -= 1
den += 1
return num / float(den)
print('*** Program Started ***')
# x=[1,2,3,4,5]
y1=[101,102,103,104,105,106,107]
y2=[101,100,99,98,97,96,95]
y3=[101,102,101,102,101,102,102]
y4=[101,102,101,101,101,102,103]
# y3=y2
# y4=y1
x=np.arange(len(y1))
pc = stats.pearsonr(x,y1)
tau = stats.kendalltau(x,y1)
rho = stats.spearmanr(x,y1)
gamma = goodman_kruskal_gamma(x,y1)
ax1 = plt.subplot(221)
plt.scatter(x,y1,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))
pc = stats.pearsonr(x,y2)
tau = stats.kendalltau(x,y2)
rho = stats.spearmanr(x,y2)
gamma = goodman_kruskal_gamma(x,y2)
ax2 = plt.subplot(222)
plt.scatter(x,y2,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))
pc = stats.pearsonr(x,y3)
tau = stats.kendalltau(x,y3)
rho = stats.spearmanr(x,y3)
gamma = goodman_kruskal_gamma(x,y3)
ax2 = plt.subplot(223)
plt.scatter(x,y3,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))
pc = stats.pearsonr(x,y4)
tau = stats.kendalltau(x,y4)
rho = stats.spearmanr(x,y4)
gamma = goodman_kruskal_gamma(x,y4)
ax2 = plt.subplot(224)
plt.scatter(x,y4,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))
# Saving image
plt.savefig('correlationexamples-01.png')
# In case you dont want to save image but just displya it
plt.show()
print('*** Program ended ***')
################################################################################################
# name: linear-regression-01-statsmodels.py
# desc: linear regression using statsmodels
# date: 2018-07-14
# Author: conquistadorjd
# reference: http://www.statsmodels.org/dev/examples/notebooks/generated/ols.html
################################################################################################
import numpy as np
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt
print('*** Program started ***')
##################################### Testing different patterns
y1=[101,102,103,104,105,106,107]
y2=[101,100,99,98,97,96,95]
y3=[101,102,101,102,101,102,101]
y4=[101,103,105,107,109,111,115]
y5=[101,103,102,105,102,107,105]
y6=[1,2,3,4,5,6,7]
y=y5
x =np.arange(len(y))
x = x +1
# this is to preserve original x values to be used for plotting
x1=x
# This is needed as per statsmodel documentation
x=sm.add_constant(x)
##################################### regression
model = sm.OLS(y,x)
results = model.fit()
# print(results.summary())
# print('Parameters: ', results.params)
print( 'results.params : ',results.params)
# pc = stats.pearsonr(x,y5)
# print(pc)
# tau = stats.kendalltau(x,y5)
# print(tau)
# rho = stats.spearmanr(x,y5)
# print(rho)
# creating regression line
xx= x1
yy = results.params[0] + x1*results.params[1]
plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
plt.plot(xx,yy)
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)
# Saving image
plt.savefig('linear-regression-01-statsmodels.png')
# # In case you dont want to save image but just displya it
plt.show()
print('*** Program ended ***')
################################################################################################
# name: linear-regression-02-scikit-learn.py
# desc: linear regression using scikit-learn
# date: 2018-07-14
# Author: conquistadorjd
# reference: http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
################################################################################################
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
print('*** Program started ***')
##################################### Testing different patterns
y1=[101,102,103,104,105,106,107]
y2=[101,100,99,98,97,96,95]
y3=[101,102,101,102,101,102,101]
y4=[101,103,105,107,109,111,115]
y5=[101,103,102,105,102,107,105]
y=[1,2,3,4,5,6,7]
y=y5
x=np.arange(len(y))
x1=np.arange(len(y))
x = x +1 # to ensure count is starting from 1
x = np.array(x).reshape(-1, 1)
##################################### regression
regr = linear_model.LinearRegression()
regr.fit(x, y)
print('Coefficients: \n', regr)
m=regr.coef_[0]
b=regr.intercept_
print("slope=",m, "\nintercept=",b)
pc = stats.pearsonr(x1,y)
print(pc)
# tau = stats.kendalltau(x,y)
# print(tau)
# rho = stats.spearmanr(x,y)
# print(rho)
xx= x
yy = regr.predict(xx)
plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
plt.plot(xx,yy)
# plt.xlabel('Sample x Axis')
# plt.ylabel('Sample y Axis')
# plt.legend(loc=2)
# plt.grid(color='black', linestyle='-', linewidth=0.5)
# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)
# Saving image
plt.savefig('linear-regression-02-scikit-learn.png')
# In case you dont want to save image but just displya it
plt.show()
print('*** Program ended ***')
################################################################################################
# name: linear-regression-03-scipy.py
# desc: linear regression using scipy
# date: 2018-07-14
# Author: conquistadorjd
# reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html
################################################################################################
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
print('*** Program started ***')
##################################### Testing different patterns
y1=[101,102,103,104,105,106,107]
y2=[101,100,99,98,97,96,95]
y3=[101,102,101,102,101,102,101]
y4=[101,103,105,107,109,111,115]
y5=[101,103,102,105,102,107,105]
y6=[1,2,3,4,5,6,7]
y=y5
x=np.arange(len(y))
x1=np.arange(len(y))
x = x +1 # to ensure count is starting from 1
##################################### regression
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print('Coefficients: \n', slope, intercept, r_value, p_value, std_err)
pc = stats.pearsonr(x1,y)
print(pc)
# tau = stats.kendalltau(x,y)
# print(tau)
# rho = stats.spearmanr(x,y)
# print(rho)
# xx= x
# yy = regr.predict(xx)
plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
plt.plot(x, intercept + slope*x, label='fitted line')
# # plt.xlabel('Sample x Axis')
# # plt.ylabel('Sample y Axis')
# # plt.legend(loc=2)
# # plt.grid(color='black', linestyle='-', linewidth=0.5)
# # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)
# # Saving image
plt.savefig('linear-regression-03-scipy.png')
# # In case you dont want to save image but just displya it
plt.show()
print('*** Program ended ***')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment