Abhayparashar31 · October 3, 2020 05:33
diff --git a/linear_regression.py b/linear_regression.py
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 ## importing dataframe
 df = pd.read_csv("https://gist.githubusercontent.com/nstokoe/7d4717e96c21b8ad04ec91f361b000cb/raw/bf95a2e30fceb9f2ae990eac8379fc7d844a0196/weight-height.csv")
 X=df['Height'].values[:,None]
 y=df.iloc[:,2].values

 ## Visulization
 fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,
                                             ncols=2,
                                             figsize=(10, 8))
 fig.tight_layout(pad=3.0)
 ax1.plot(X,y)
 ax1.set_title("Weight and height")
 ax1.set_xlabel("Height")

 males=df[df['Gender']=='Male']
 females=df[df['Gender']=='Female']
 males.plot(kind='scatter',x='Height',y='Weight',
          ax=ax2,color='blue',alpha=0.3,
          title='Male and Female Populations')
 females.plot(kind='scatter',x='Height',y='Weight',
          ax=ax2,color='red',alpha=0.3,
          title='Male and Female Populations');
 ax2.legend(['Males','Females'])
 males['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='blue')
 females['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='red')
 ax3.set_title('Height distribution')
 ax3.legend(['Males','Females'])
 ax3.set_xlabel('Height in')
 ax3.axvline(males['Height'].mean(),color='blue',linewidth=2)
 ax3.axvline(females['Height'].mean(),color='red',linewidth=2);

 ax4.hist(y)
 ax4.set_title("Distribution of Weight")
 plt.show()

 ## Modeling
 X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=1/3,random_state=0)
 regressor = LinearRegression()
 regressor.fit(X_train, y_train)
 regressor.score(X_train,y_train)

 ##Prediction
 y_pred = regressor.predict(X_test)
 print(y_pred)

 ## Evaluation
 from sklearn.metrics import mean_absolute_error,r2_score
 print("mean_absolute_error: ",mean_absolute_error(y_test, y_pred))
 print("r2_score: ",r2_score(y_test,y_pred))

 ## Visulizing results
 fig, (ax1,ax2) = plt.subplots(nrows=1,ncols=2,figsize = (14,6))
 ax1.scatter(X_train, y_train, color = 'red')
 ax1.plot(X_train, regressor.predict(X_train), color = 'blue')
 ax1.set_title('Traning Set')
 ax1.set_xlabel('Height')
 ax1.set_ylabel('Weight')

 ax2.scatter(X_test, y_test, color = 'red')
 ax2.plot(X_train, regressor.predict(X_train), color = 'blue')
 ax2.set_title('Test Set')
 ax2.set_xlabel('Height')
 ax2.set_ylabel('Weight')



 plt.show()
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	## importing dataframe
	df = pd.read_csv("https://gist.githubusercontent.com/nstokoe/7d4717e96c21b8ad04ec91f361b000cb/raw/bf95a2e30fceb9f2ae990eac8379fc7d844a0196/weight-height.csv")
	X=df['Height'].values[:,None]
	y=df.iloc[:,2].values

	## Visulization
	fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,
	ncols=2,
	figsize=(10, 8))
	fig.tight_layout(pad=3.0)
	ax1.plot(X,y)
	ax1.set_title("Weight and height")
	ax1.set_xlabel("Height")

	males=df[df['Gender']=='Male']
	females=df[df['Gender']=='Female']
	males.plot(kind='scatter',x='Height',y='Weight',
	ax=ax2,color='blue',alpha=0.3,
	title='Male and Female Populations')
	females.plot(kind='scatter',x='Height',y='Weight',
	ax=ax2,color='red',alpha=0.3,
	title='Male and Female Populations');
	ax2.legend(['Males','Females'])
	males['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='blue')
	females['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='red')
	ax3.set_title('Height distribution')
	ax3.legend(['Males','Females'])
	ax3.set_xlabel('Height in')
	ax3.axvline(males['Height'].mean(),color='blue',linewidth=2)
	ax3.axvline(females['Height'].mean(),color='red',linewidth=2);

	ax4.hist(y)
	ax4.set_title("Distribution of Weight")
	plt.show()

	## Modeling
	X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=1/3,random_state=0)
	regressor = LinearRegression()
	regressor.fit(X_train, y_train)
	regressor.score(X_train,y_train)

	##Prediction
	y_pred = regressor.predict(X_test)
	print(y_pred)

	## Evaluation
	from sklearn.metrics import mean_absolute_error,r2_score
	print("mean_absolute_error: ",mean_absolute_error(y_test, y_pred))
	print("r2_score: ",r2_score(y_test,y_pred))

	## Visulizing results
	fig, (ax1,ax2) = plt.subplots(nrows=1,ncols=2,figsize = (14,6))
	ax1.scatter(X_train, y_train, color = 'red')
	ax1.plot(X_train, regressor.predict(X_train), color = 'blue')
	ax1.set_title('Traning Set')
	ax1.set_xlabel('Height')
	ax1.set_ylabel('Weight')

	ax2.scatter(X_test, y_test, color = 'red')
	ax2.plot(X_train, regressor.predict(X_train), color = 'blue')
	ax2.set_title('Test Set')
	ax2.set_xlabel('Height')
	ax2.set_ylabel('Weight')



	plt.show()