conquistadorjd · July 14, 2018 17:39
diff --git a/correlationexamples-00.py b/correlationexamples-00.py
 ################################################################################################
 #	name:	correlationexamples-00.py
 #	desc:	Correlations 
 #	date:	2018-07-14
 #	Author:	conquistadorjd
 #   remark : goodman_kruskal_gamma formula taken from https://github.com/shilad/context-sensitive-sr/blob/master/SRSurvey/src/python/correlation.py
 ################################################################################################
 from matplotlib import pyplot as plt
 import numpy as np
 from scipy import stats
 from itertools import combinations, permutations

 def goodman_kruskal_gamma(m, n):
    """ 
    compute the Goodman and Kruskal gamma rank correlation coefficient; 
    this statistic ignores ties is unsuitable when the number of ties in the
    data is high. it's also slow. 
    >>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4]
    >>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3]
    >>> goodman_kruskal_gamma(x, y)
    0.9166666666666666
    """
    num = 0
    den = 0
    for (i, j) in permutations(range(len(m)), 2):
        m_dir = m[i] - m[j]
        n_dir = n[i] - n[j]
        sign = m_dir * n_dir
        if sign > 0:
            num += 1
            den += 1
        elif sign < 0:
            num -= 1
            den += 1
    return num / float(den)

 print('*** Program Started ***')

 # x=[1,2,3,4,5]

 y1=[101,102,103,104,105,106,107]
 y2=[101,100,99,98,97,96,95]
 y3=[101,102,101,102,101,102,102]
 y4=[101,102,101,101,101,102,103]
 # y3=y2
 # y4=y1
 x=np.arange(len(y1))

 pc = stats.pearsonr(x,y1)
 tau = stats.kendalltau(x,y1)
 rho = stats.spearmanr(x,y1)
 gamma = goodman_kruskal_gamma(x,y1)
 ax1 = plt.subplot(221)
 plt.scatter(x,y1,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))


 pc = stats.pearsonr(x,y2)
 tau = stats.kendalltau(x,y2)
 rho = stats.spearmanr(x,y2)
 gamma = goodman_kruskal_gamma(x,y2)
 ax2 = plt.subplot(222)
 plt.scatter(x,y2,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))


 pc = stats.pearsonr(x,y3)
 tau = stats.kendalltau(x,y3)
 rho = stats.spearmanr(x,y3)
 gamma = goodman_kruskal_gamma(x,y3)
 ax2 = plt.subplot(223)
 plt.scatter(x,y3,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))

 pc = stats.pearsonr(x,y4)
 tau = stats.kendalltau(x,y4)
 rho = stats.spearmanr(x,y4)
 gamma = goodman_kruskal_gamma(x,y4)
 ax2 = plt.subplot(224)
 plt.scatter(x,y4,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))

 # Saving image
 plt.savefig('correlationexamples-01.png')

 # In case you dont want to save image but just displya it
 plt.show()

 print('*** Program ended ***')
diff --git a/linear-regression-01-statsmodels.py b/linear-regression-01-statsmodels.py
 ################################################################################################
 #	name:	linear-regression-01-statsmodels.py
 #	desc:	linear regression using statsmodels
 #	date:	2018-07-14
 #	Author:	conquistadorjd
 #   reference: http://www.statsmodels.org/dev/examples/notebooks/generated/ols.html
 ################################################################################################
 import numpy as np
 import statsmodels.api as sm
 from scipy import stats
 import matplotlib.pyplot as plt

 print('*** Program started ***')

 ##################################### Testing different patterns
 y1=[101,102,103,104,105,106,107]
 y2=[101,100,99,98,97,96,95]
 y3=[101,102,101,102,101,102,101]
 y4=[101,103,105,107,109,111,115]
 y5=[101,103,102,105,102,107,105]
 y6=[1,2,3,4,5,6,7]
 y=y5


 x =np.arange(len(y))
 x = x +1
 # this is to preserve original x values to be used for plotting
 x1=x
 # This is needed as per statsmodel documentation
 x=sm.add_constant(x)
 ##################################### regression
 model = sm.OLS(y,x)
 results = model.fit()

 # print(results.summary())
 # print('Parameters: ', results.params)
 print( 'results.params : ',results.params)

 # pc = stats.pearsonr(x,y5)
 # print(pc)
 # tau = stats.kendalltau(x,y5)
 # print(tau)
 # rho = stats.spearmanr(x,y5)
 # print(rho)

 # creating regression line
 xx= x1
 yy = results.params[0] + x1*results.params[1]
 plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 plt.plot(xx,yy)
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)


 # Saving image
 plt.savefig('linear-regression-01-statsmodels.png')

 # # In case you dont want to save image but just displya it
 plt.show()

 print('*** Program ended ***')
diff --git a/linear-regression-02-scikit-learn.py b/linear-regression-02-scikit-learn.py
 ################################################################################################
 #	name:	linear-regression-02-scikit-learn.py
 #	desc:	linear regression using scikit-learn
 #	date:	2018-07-14
 #	Author:	conquistadorjd
 #   reference: http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
 ################################################################################################
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn import datasets, linear_model
 from sklearn.metrics import mean_squared_error, r2_score
 from scipy import stats


 print('*** Program started ***')

 ##################################### Testing different patterns
 y1=[101,102,103,104,105,106,107]
 y2=[101,100,99,98,97,96,95]
 y3=[101,102,101,102,101,102,101]
 y4=[101,103,105,107,109,111,115]
 y5=[101,103,102,105,102,107,105]
 y=[1,2,3,4,5,6,7]
 y=y5

 x=np.arange(len(y))
 x1=np.arange(len(y))
 x = x +1 # to ensure count is starting from 1
 x = np.array(x).reshape(-1, 1)

 ##################################### regression
 regr = linear_model.LinearRegression()
 regr.fit(x, y)

 print('Coefficients: \n', regr)
 m=regr.coef_[0]
 b=regr.intercept_
 print("slope=",m, "\nintercept=",b)


 pc = stats.pearsonr(x1,y)
 print(pc)
 # tau = stats.kendalltau(x,y)
 # print(tau)
 # rho = stats.spearmanr(x,y)
 # print(rho)

 xx= x
 yy = regr.predict(xx)
 plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 plt.plot(xx,yy)
 # plt.xlabel('Sample x Axis')  
 # plt.ylabel('Sample y Axis')  
 # plt.legend(loc=2)
 # plt.grid(color='black', linestyle='-', linewidth=0.5)
 # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)

 # Saving image
 plt.savefig('linear-regression-02-scikit-learn.png')

 # In case you dont want to save image but just displya it
 plt.show()

 print('*** Program ended ***')
diff --git a/linear-regression-03-scipy.py b/linear-regression-03-scipy.py
 ################################################################################################
 #	name:	linear-regression-03-scipy.py
 #	desc:	linear regression using  scipy
 #	date:	2018-07-14
 #	Author:	conquistadorjd
 #   reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html
 ################################################################################################
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn import datasets, linear_model
 from sklearn.metrics import mean_squared_error, r2_score
 from scipy import stats


 print('*** Program started ***')

 ##################################### Testing different patterns
 y1=[101,102,103,104,105,106,107]
 y2=[101,100,99,98,97,96,95]
 y3=[101,102,101,102,101,102,101]
 y4=[101,103,105,107,109,111,115]
 y5=[101,103,102,105,102,107,105]
 y6=[1,2,3,4,5,6,7]
 y=y5

 x=np.arange(len(y))
 x1=np.arange(len(y))
 x = x +1 # to ensure count is starting from 1

 ##################################### regression
 slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

 print('Coefficients: \n', slope, intercept, r_value, p_value, std_err)

 pc = stats.pearsonr(x1,y)
 print(pc)
 # tau = stats.kendalltau(x,y)
 # print(tau)
 # rho = stats.spearmanr(x,y)
 # print(rho)

 # xx= x
 # yy = regr.predict(xx)
 plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
 plt.plot(x, intercept + slope*x, label='fitted line')
 # # plt.xlabel('Sample x Axis')  
 # # plt.ylabel('Sample y Axis')  
 # # plt.legend(loc=2)
 # # plt.grid(color='black', linestyle='-', linewidth=0.5)
 # # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)

 # # Saving image
 plt.savefig('linear-regression-03-scipy.png')

 # # In case you dont want to save image but just displya it
 plt.show()

 print('*** Program ended ***')
	################################################################################################
	# name: correlationexamples-00.py
	# desc: Correlations
	# date: 2018-07-14
	# Author: conquistadorjd
	# remark : goodman_kruskal_gamma formula taken from https://github.com/shilad/context-sensitive-sr/blob/master/SRSurvey/src/python/correlation.py
	################################################################################################
	from matplotlib import pyplot as plt
	import numpy as np
	from scipy import stats
	from itertools import combinations, permutations

	def goodman_kruskal_gamma(m, n):
	"""
	compute the Goodman and Kruskal gamma rank correlation coefficient;
	this statistic ignores ties is unsuitable when the number of ties in the
	data is high. it's also slow.
	>>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4]
	>>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3]
	>>> goodman_kruskal_gamma(x, y)
	0.9166666666666666
	"""
	num = 0
	den = 0
	for (i, j) in permutations(range(len(m)), 2):
	m_dir = m[i] - m[j]
	n_dir = n[i] - n[j]
	sign = m_dir * n_dir
	if sign > 0:
	num += 1
	den += 1
	elif sign < 0:
	num -= 1
	den += 1
	return num / float(den)

	print('* Program Started *')

	# x=[1,2,3,4,5]

	y1=[101,102,103,104,105,106,107]
	y2=[101,100,99,98,97,96,95]
	y3=[101,102,101,102,101,102,102]
	y4=[101,102,101,101,101,102,103]
	# y3=y2
	# y4=y1
	x=np.arange(len(y1))

	pc = stats.pearsonr(x,y1)
	tau = stats.kendalltau(x,y1)
	rho = stats.spearmanr(x,y1)
	gamma = goodman_kruskal_gamma(x,y1)
	ax1 = plt.subplot(221)
	plt.scatter(x,y1,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))


	pc = stats.pearsonr(x,y2)
	tau = stats.kendalltau(x,y2)
	rho = stats.spearmanr(x,y2)
	gamma = goodman_kruskal_gamma(x,y2)
	ax2 = plt.subplot(222)
	plt.scatter(x,y2,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))


	pc = stats.pearsonr(x,y3)
	tau = stats.kendalltau(x,y3)
	rho = stats.spearmanr(x,y3)
	gamma = goodman_kruskal_gamma(x,y3)
	ax2 = plt.subplot(223)
	plt.scatter(x,y3,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))

	pc = stats.pearsonr(x,y4)
	tau = stats.kendalltau(x,y4)
	rho = stats.spearmanr(x,y4)
	gamma = goodman_kruskal_gamma(x,y4)
	ax2 = plt.subplot(224)
	plt.scatter(x,y4,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma))

	# Saving image
	plt.savefig('correlationexamples-01.png')

	# In case you dont want to save image but just displya it
	plt.show()

	print('* Program ended *')
	################################################################################################
	# name: linear-regression-01-statsmodels.py
	# desc: linear regression using statsmodels
	# date: 2018-07-14
	# Author: conquistadorjd
	# reference: http://www.statsmodels.org/dev/examples/notebooks/generated/ols.html
	################################################################################################
	import numpy as np
	import statsmodels.api as sm
	from scipy import stats
	import matplotlib.pyplot as plt

	print('* Program started *')

	##################################### Testing different patterns
	y1=[101,102,103,104,105,106,107]
	y2=[101,100,99,98,97,96,95]
	y3=[101,102,101,102,101,102,101]
	y4=[101,103,105,107,109,111,115]
	y5=[101,103,102,105,102,107,105]
	y6=[1,2,3,4,5,6,7]
	y=y5


	x =np.arange(len(y))
	x = x +1
	# this is to preserve original x values to be used for plotting
	x1=x
	# This is needed as per statsmodel documentation
	x=sm.add_constant(x)
	##################################### regression
	model = sm.OLS(y,x)
	results = model.fit()

	# print(results.summary())
	# print('Parameters: ', results.params)
	print( 'results.params : ',results.params)

	# pc = stats.pearsonr(x,y5)
	# print(pc)
	# tau = stats.kendalltau(x,y5)
	# print(tau)
	# rho = stats.spearmanr(x,y5)
	# print(rho)

	# creating regression line
	xx= x1
	yy = results.params[0] + x1*results.params[1]
	plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	plt.plot(xx,yy)
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)


	# Saving image
	plt.savefig('linear-regression-01-statsmodels.png')

	# # In case you dont want to save image but just displya it
	plt.show()

	print('* Program ended *')
	################################################################################################
	# name: linear-regression-02-scikit-learn.py
	# desc: linear regression using scikit-learn
	# date: 2018-07-14
	# Author: conquistadorjd
	# reference: http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
	################################################################################################
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn import datasets, linear_model
	from sklearn.metrics import mean_squared_error, r2_score
	from scipy import stats


	print('* Program started *')

	##################################### Testing different patterns
	y1=[101,102,103,104,105,106,107]
	y2=[101,100,99,98,97,96,95]
	y3=[101,102,101,102,101,102,101]
	y4=[101,103,105,107,109,111,115]
	y5=[101,103,102,105,102,107,105]
	y=[1,2,3,4,5,6,7]
	y=y5

	x=np.arange(len(y))
	x1=np.arange(len(y))
	x = x +1 # to ensure count is starting from 1
	x = np.array(x).reshape(-1, 1)

	##################################### regression
	regr = linear_model.LinearRegression()
	regr.fit(x, y)

	print('Coefficients: \n', regr)
	m=regr.coef_[0]
	b=regr.intercept_
	print("slope=",m, "\nintercept=",b)


	pc = stats.pearsonr(x1,y)
	print(pc)
	# tau = stats.kendalltau(x,y)
	# print(tau)
	# rho = stats.spearmanr(x,y)
	# print(rho)

	xx= x
	yy = regr.predict(xx)
	plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	plt.plot(xx,yy)
	# plt.xlabel('Sample x Axis')
	# plt.ylabel('Sample y Axis')
	# plt.legend(loc=2)
	# plt.grid(color='black', linestyle='-', linewidth=0.5)
	# plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)

	# Saving image
	plt.savefig('linear-regression-02-scikit-learn.png')

	# In case you dont want to save image but just displya it
	plt.show()

	print('* Program ended *')
	################################################################################################
	# name: linear-regression-03-scipy.py
	# desc: linear regression using scipy
	# date: 2018-07-14
	# Author: conquistadorjd
	# reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html
	################################################################################################
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn import datasets, linear_model
	from sklearn.metrics import mean_squared_error, r2_score
	from scipy import stats


	print('* Program started *')

	##################################### Testing different patterns
	y1=[101,102,103,104,105,106,107]
	y2=[101,100,99,98,97,96,95]
	y3=[101,102,101,102,101,102,101]
	y4=[101,103,105,107,109,111,115]
	y5=[101,103,102,105,102,107,105]
	y6=[1,2,3,4,5,6,7]
	y=y5

	x=np.arange(len(y))
	x1=np.arange(len(y))
	x = x +1 # to ensure count is starting from 1

	##################################### regression
	slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

	print('Coefficients: \n', slope, intercept, r_value, p_value, std_err)

	pc = stats.pearsonr(x1,y)
	print(pc)
	# tau = stats.kendalltau(x,y)
	# print(tau)
	# rho = stats.spearmanr(x,y)
	# print(rho)

	# xx= x
	# yy = regr.predict(xx)
	plt.scatter(x,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label="Jagur")
	plt.plot(x, intercept + slope*x, label='fitted line')
	# # plt.xlabel('Sample x Axis')
	# # plt.ylabel('Sample y Axis')
	# # plt.legend(loc=2)
	# # plt.grid(color='black', linestyle='-', linewidth=0.5)
	# # plt.title('PC '+ "{:.3f}".format(pc[0]) + ' tau ' + "{:.3f}".format(tau[0]) + ' rho ' + "{:.3f}".format(rho[0])+ ' gamma ' + "{:.3f}".format(gamma), fontsize=8)

	# # Saving image
	plt.savefig('linear-regression-03-scipy.png')

	# # In case you dont want to save image but just displya it
	plt.show()

	print('* Program ended *')