Last active
February 2, 2019 04:22
-
-
Save conquistadorjd/43e731ef9297ecfe4d727831a5e75a22 to your computer and use it in GitHub Desktop.
python-07-ml-gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-01-scikt-learn.py | |
# desc: linear regression using scikit learn | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html | |
################################################################################################ | |
import numpy as np | |
import statsmodels.api as sm | |
from scipy import stats | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
print('*** Program started ***') | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# Fire and Theft in Chicago | |
# In the following data pairs | |
# X = fires per 1000 housing units | |
# Y = thefts per 1000 population | |
# within the same Zip code in the Chicago metro area | |
# Reference: U.S. Commission on Civil Rights | |
# df = pd.read_excel('slr05.xls', sheet_name='slr05') | |
# x = df['X'].tolist() | |
# y = df['Y'].tolist() | |
# label = "Fires and thefts" | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# In the following data | |
# X = chirps/sec for the striped ground cricket | |
# Y = temperature in degrees Fahrenheit | |
# Reference: The Song of Insects by Dr.G.W. Pierce, Harvard College Press | |
df = pd.read_excel('slr02.xls', sheet_name='slr02') | |
x = df['X'].tolist() | |
y = df['Y'].tolist() | |
label = "chirps per sec and temp" | |
print(df) | |
# # this is to preserve original x values to be used for plotting | |
x1=np.array(x) | |
x1=sm.add_constant(x1) | |
y1=np.array(y) | |
y1=sm.add_constant(y1) | |
print("std", np.std(x1)) | |
print("std", np.std(y1)) | |
######################################## linear regression calculations | |
print("*** input data type : ",x1,'\n and ', y1) | |
reg = LinearRegression().fit(x1, y1) | |
print('reg', reg) | |
####################################### other factors | |
pc = stats.pearsonr(x1,y) | |
print('pc : ',pc) | |
tau = stats.kendalltau(x1,y) | |
# print(tau) | |
rho = stats.spearmanr(x1,y) | |
# print(rho) | |
# # creating regression line | |
xx= x1 | |
######################################## plotting | |
yy = intercept + x1*slope | |
plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label=label) | |
plt.plot(xx,yy) | |
plt.legend(loc=2) | |
plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('C '+ "{:.3f}".format(intercept) + ' M ' + "{:.3f}".format(slope) | |
+ ' PC '+ "{:.3f}".format(pc[0]) | |
+ ' tau ' + "{:.3f}".format(tau[0]) | |
+ ' rho ' + "{:.3f}".format(rho[0]) | |
# + ' gamma ' + "{:.3f}".format(gamma) | |
, fontsize=10) | |
# Saving image | |
plt.savefig('linear-regression-01-scipy-'+ label+'.png') | |
# In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-01-scipy.py | |
# desc: linear regression using scipy | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html | |
################################################################################################ | |
import numpy as np | |
import statsmodels.api as sm | |
from scipy import stats | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
print('*** Program started ***') | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# Fire and Theft in Chicago | |
# In the following data pairs | |
# X = fires per 1000 housing units | |
# Y = thefts per 1000 population | |
# within the same Zip code in the Chicago metro area | |
# Reference: U.S. Commission on Civil Rights | |
# df = pd.read_excel('slr05.xls', sheet_name='slr05') | |
# x = df['X'].tolist() | |
# y = df['Y'].tolist() | |
# label = "Fires and thefts" | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# In the following data | |
# X = chirps/sec for the striped ground cricket | |
# Y = temperature in degrees Fahrenheit | |
# Reference: The Song of Insects by Dr.G.W. Pierce, Harvard College Press | |
df = pd.read_excel('slr02.xls', sheet_name='slr02') | |
x = df['X'].tolist() | |
y = df['Y'].tolist() | |
label = "chirps per sec and temp" | |
print(df) | |
# # this is to preserve original x values to be used for plotting | |
x1=np.array(x) | |
x1=sm.add_constant(x1) | |
y1=np.array(y) | |
y1=sm.add_constant(y1) | |
print("std", np.std(x1)) | |
print("std", np.std(y1)) | |
######################################## linear regression calculations | |
print("*** input data type : ",x1,'\n and ', y1) | |
reg = LinearRegression().fit(x1, y1) | |
print('reg', reg) | |
####################################### other factors | |
pc = stats.pearsonr(x1,y) | |
print('pc : ',pc) | |
tau = stats.kendalltau(x1,y) | |
# print(tau) | |
rho = stats.spearmanr(x1,y) | |
# print(rho) | |
# # creating regression line | |
xx= x1 | |
######################################## plotting | |
yy = intercept + x1*slope | |
plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label=label) | |
plt.plot(xx,yy) | |
plt.legend(loc=2) | |
plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('C '+ "{:.3f}".format(intercept) + ' M ' + "{:.3f}".format(slope) | |
+ ' PC '+ "{:.3f}".format(pc[0]) | |
+ ' tau ' + "{:.3f}".format(tau[0]) | |
+ ' rho ' + "{:.3f}".format(rho[0]) | |
# + ' gamma ' + "{:.3f}".format(gamma) | |
, fontsize=10) | |
# Saving image | |
plt.savefig('linear-regression-01-scipy-'+ label+'.png') | |
# In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################################ | |
# name: linear-regression-01-statsmodels.py | |
# desc: linear regression using statsmodels | |
# date: 2018-07-14 | |
# Author: conquistadorjd | |
# reference: http://www.statsmodels.org/dev/examples/notebooks/generated/ols.html | |
################################################################################################ | |
import numpy as np | |
import statsmodels.api as sm | |
from scipy import stats | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
print('*** Program started ***') | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# Fire and Theft in Chicago | |
# In the following data pairs | |
# X = fires per 1000 housing units | |
# Y = thefts per 1000 population | |
# within the same Zip code in the Chicago metro area | |
# Reference: U.S. Commission on Civil Rights | |
# df = pd.read_excel('slr05.xls', sheet_name='slr05') | |
# x = df['X'].tolist() | |
# y = df['Y'].tolist() | |
# label = "Fires and thefts" | |
########################################################################################################################## | |
# Data downloaded from https://college.cengage.com/mathematics/brase/understandable_statistics/7e/students/datasets/slr/frames/frame.html | |
# In the following data | |
# X = chirps/sec for the striped ground cricket | |
# Y = temperature in degrees Fahrenheit | |
# Reference: The Song of Insects by Dr.G.W. Pierce, Harvard College Press | |
df = pd.read_excel('slr02.xls', sheet_name='slr02') | |
x = df['X'].tolist() | |
y = df['Y'].tolist() | |
label = "chirps per sec and temp" | |
print(df) | |
# # this is to preserve original x values to be used for plotting | |
x1=np.array(x) | |
y1=np.array(y) | |
print("std", np.std(x1)) | |
print("std", np.std(y1)) | |
# # This is needed as per statsmodel documentation | |
# print('x before : ' , x) | |
x=sm.add_constant(x) | |
# print('x after : ' , x) | |
# ##################################### regression | |
model = sm.OLS(y,x) | |
results = model.fit() | |
print("summary : ",results.summary()) | |
# # print('Parameters: ', results.params) | |
print( 'results.params : ',results.params) | |
# print(x1, y) | |
pc = stats.pearsonr(x1,y) | |
print('pc : ',pc) | |
tau = stats.kendalltau(x1,y) | |
# print(tau) | |
rho = stats.spearmanr(x1,y) | |
# print(rho) | |
# # creating regression line | |
xx= x1 | |
# print(type(results.params[1]), results.params[1]) | |
print('x1 ',type(x1), ' results.params[1] ', type(results.params[1])) | |
# print('calculations : ', pc*np.std(y1)/np.std(x1)) | |
yy = results.params[0] + x1*results.params[1] | |
plt.scatter(x1,y,s=None, marker='o',color='g',edgecolors='g',alpha=0.9,label=label) | |
plt.plot(xx,yy) | |
plt.legend(loc=2) | |
plt.grid(color='black', linestyle='-', linewidth=0.5) | |
plt.title('C '+ "{:.3f}".format(results.params[0]) + ' M ' + "{:.3f}".format(results.params[1]) | |
+ ' PC '+ "{:.3f}".format(pc[0]) | |
+ ' tau ' + "{:.3f}".format(tau[0]) | |
+ ' rho ' + "{:.3f}".format(rho[0]) | |
# + ' gamma ' + "{:.3f}".format(gamma) | |
, fontsize=10) | |
# Saving image | |
plt.savefig('linear-regression-01-statsmodels-'+ label+'.png') | |
# In case you dont want to save image but just displya it | |
plt.show() | |
print('*** Program ended ***') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment