animesh-agarwal

animesh-agarwal / Load data into data frame

Last active October 1, 2018 13:17

	boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
	boston.head()

animesh-agarwal / Add target to dataframe

Last active October 2, 2018 05:27

boston['MEDV'] = boston_dataset.target

animesh-agarwal / visualize the data.py

Last active October 1, 2018 17:37

	sns.set(rc={'figure.figsize':(11.7,8.27)})
	sns.distplot(boston['MEDV'], bins=30)
	plt.show()

animesh-agarwal / scatter plot.py

Created October 1, 2018 13:28

	plt.figure(figsize=(20, 5))

	features = ['LSTAT', 'RM']
	target = boston['MEDV']

	for i, col in enumerate(features):
	plt.subplot(1, len(features) , i+1)
	x = boston[col]
	y = target
	plt.scatter(x, y, marker='o')

animesh-agarwal / prepare data for the model.py

Created October 1, 2018 13:30

	X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])
	Y = boston['MEDV']

animesh-agarwal / train_test_split.py

Last active October 1, 2018 17:36

	from sklearn.model_selection import train_test_split

	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
	print(X_train.shape)
	print(X_test.shape)
	print(Y_train.shape)
	print(Y_test.shape)

animesh-agarwal / linear regression model for boston housing.py

Last active February 11, 2020 22:10

	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import mean_squared_error

	lin_model = LinearRegression()
	lin_model.fit(X_train, Y_train)

animesh-agarwal / check for null values.py

Created October 1, 2018 16:18

boston.isnull().sum()

animesh-agarwal / correlation matrix.py

Created October 1, 2018 17:39

	correlation_matrix = boston.corr().round(2)
	# annot = True to print the values inside the square
	sns.heatmap(data=correlation_matrix, annot=True)

animesh-agarwal / model evaluation.py

Last active October 2, 2018 10:32

	# model evaluation for training set
	y_train_predict = lin_model.predict(X_train)
	rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))
	r2 = r2_score(Y_train, y_train_predict)

	print("The model performance for training set")
	print("--------------------------------------")
	print('RMSE is {}'.format(rmse))
	print('R2 score is {}'.format(r2))
	print("\n")