Skip to content

Instantly share code, notes, and snippets.

@Abhayparashar31
Created October 3, 2020 05:33
Show Gist options
  • Save Abhayparashar31/63b8cf909d7ab6efd2575791effda748 to your computer and use it in GitHub Desktop.
Save Abhayparashar31/63b8cf909d7ab6efd2575791effda748 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
## importing dataframe
df = pd.read_csv("https://gist.githubusercontent.com/nstokoe/7d4717e96c21b8ad04ec91f361b000cb/raw/bf95a2e30fceb9f2ae990eac8379fc7d844a0196/weight-height.csv")
X=df['Height'].values[:,None]
y=df.iloc[:,2].values
## Visulization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,
ncols=2,
figsize=(10, 8))
fig.tight_layout(pad=3.0)
ax1.plot(X,y)
ax1.set_title("Weight and height")
ax1.set_xlabel("Height")
males=df[df['Gender']=='Male']
females=df[df['Gender']=='Female']
males.plot(kind='scatter',x='Height',y='Weight',
ax=ax2,color='blue',alpha=0.3,
title='Male and Female Populations')
females.plot(kind='scatter',x='Height',y='Weight',
ax=ax2,color='red',alpha=0.3,
title='Male and Female Populations');
ax2.legend(['Males','Females'])
males['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='blue')
females['Height'].plot(kind='hist',ax=ax3,bins=50,range=(50,80),alpha=0.3,color='red')
ax3.set_title('Height distribution')
ax3.legend(['Males','Females'])
ax3.set_xlabel('Height in')
ax3.axvline(males['Height'].mean(),color='blue',linewidth=2)
ax3.axvline(females['Height'].mean(),color='red',linewidth=2);
ax4.hist(y)
ax4.set_title("Distribution of Weight")
plt.show()
## Modeling
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=1/3,random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
regressor.score(X_train,y_train)
##Prediction
y_pred = regressor.predict(X_test)
print(y_pred)
## Evaluation
from sklearn.metrics import mean_absolute_error,r2_score
print("mean_absolute_error: ",mean_absolute_error(y_test, y_pred))
print("r2_score: ",r2_score(y_test,y_pred))
## Visulizing results
fig, (ax1,ax2) = plt.subplots(nrows=1,ncols=2,figsize = (14,6))
ax1.scatter(X_train, y_train, color = 'red')
ax1.plot(X_train, regressor.predict(X_train), color = 'blue')
ax1.set_title('Traning Set')
ax1.set_xlabel('Height')
ax1.set_ylabel('Weight')
ax2.scatter(X_test, y_test, color = 'red')
ax2.plot(X_train, regressor.predict(X_train), color = 'blue')
ax2.set_title('Test Set')
ax2.set_xlabel('Height')
ax2.set_ylabel('Weight')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment