ksv-muralidhar · February 21, 2021 17:14
diff --git a/dl1.py b/dl1.py
 import numpy as np
 import pandas as pd
 from sklearn.datasets import load_boston
 from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
 from sklearn.neighbors import KNeighborsRegressor
 from sklearn.model_selection import cross_validate, train_test_split
 from sklearn.metrics import mean_squared_error

 #Importing the dataset
 data = pd.DataFrame(load_boston()['data'],columns=load_boston()['feature_names'])
 data['target'] = load_boston()['target']


 #Split the input and target features
 X = data.iloc[:,:-1].copy()
 y = data.iloc[:,-1].copy()


 # Adding 100 random missing values
 np.random.seed(11)
 rand_cols = np.random.randint(0,X.shape[1],100)
 rand_rows = np.random.randint(0,X.shape[0],100)
 for i,j in zip(rand_rows,rand_cols):
    X.iloc[i,j] = np.nan
    
 #Splitting the data into training and test sets
 X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=11)

 #Initislizing KNN Regressor
 knn = KNeighborsRegressor()

 #Initializing mode imputer
 imp = SimpleImputer(strategy='most_frequent')

 #Initializing StandardScaler
 standard_scaler = StandardScaler()

 #Imputing and scaling X_train
 X_train_impute = imp.fit_transform(X_train).copy()
 X_train_scaled = standard_scaler.fit_transform(X_train_impute).copy()

 #Running 5-fold cross-validation
 cv = cross_validate(estimator=knn,X=X_train_scaled,y=y_train,cv=5,scoring="neg_root_mean_squared_error",return_train_score=True)

 #Calculating mean of the training scores of cross-validation
 print(f'Training RMSE (with data leakage): {-1 * np.mean(cv["train_score"])}')

 #Calculating mean of the validation scores of cross-validation
 print(f'validation RMSE (with data leakage): {-1 * np.mean(cv["test_score"])}')

 #fitting the model to the training data
 lr.fit(X_train_scaled,y_train)

 #preprocessing the test data
 X_test_impute = imp.transform(X_test).copy()
 X_test_scaled = standard_scaler.transform(X_test_impute).copy()

 #Predictions and model evaluation on unseen data
 pred = lr.predict(X_test_scaled)
 print(f'RMSE on unseen data: {np.sqrt(mean_squared_error(y_test,pred))}')
diff --git a/dl2.py b/dl2.py
 #Preprocessing and regressor pipeline
 pipeline = Pipeline(steps=[['imputer',imp],['scaler',standard_scaler],['regressor',knn]])

 #Running 5-fold cross-validation using pipeline as estimator
 cv = cross_validate(estimator=pipeline,X=X_train,y=y_train,cv=5,scoring="neg_root_mean_squared_error",return_train_score=True)

 #Calculating mean of the training scores of cross-validation
 print(f'Training RMSE (without data leakage): {-1 * np.mean(cv["train_score"])}')

 #Calculating mean of the validation scores of cross-validation
 print(f'validation RMSE (without data leakage): {-1 * np.mean(cv["test_score"])}')

 #fitting the pipeline to the training data
 pipeline.fit(X_train,y_train)
      
 #Predictions and model evaluation on unseen data
 pred = pipeline.predict(X_test)
 print(f'RMSE on unseen data: {np.sqrt(mean_squared_error(y_test,pred))}')
	import numpy as np
	import pandas as pd
	from sklearn.datasets import load_boston
	from sklearn.preprocessing import StandardScaler
	from sklearn.pipeline import Pipeline
	from sklearn.impute import SimpleImputer
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.model_selection import cross_validate, train_test_split
	from sklearn.metrics import mean_squared_error

	#Importing the dataset
	data = pd.DataFrame(load_boston()['data'],columns=load_boston()['feature_names'])
	data['target'] = load_boston()['target']


	#Split the input and target features
	X = data.iloc[:,:-1].copy()
	y = data.iloc[:,-1].copy()


	# Adding 100 random missing values
	np.random.seed(11)
	rand_cols = np.random.randint(0,X.shape[1],100)
	rand_rows = np.random.randint(0,X.shape[0],100)
	for i,j in zip(rand_rows,rand_cols):
	X.iloc[i,j] = np.nan

	#Splitting the data into training and test sets
	X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=11)

	#Initislizing KNN Regressor
	knn = KNeighborsRegressor()

	#Initializing mode imputer
	imp = SimpleImputer(strategy='most_frequent')

	#Initializing StandardScaler
	standard_scaler = StandardScaler()

	#Imputing and scaling X_train
	X_train_impute = imp.fit_transform(X_train).copy()
	X_train_scaled = standard_scaler.fit_transform(X_train_impute).copy()

	#Running 5-fold cross-validation
	cv = cross_validate(estimator=knn,X=X_train_scaled,y=y_train,cv=5,scoring="neg_root_mean_squared_error",return_train_score=True)

	#Calculating mean of the training scores of cross-validation
	print(f'Training RMSE (with data leakage): {-1 * np.mean(cv["train_score"])}')

	#Calculating mean of the validation scores of cross-validation
	print(f'validation RMSE (with data leakage): {-1 * np.mean(cv["test_score"])}')

	#fitting the model to the training data
	lr.fit(X_train_scaled,y_train)

	#preprocessing the test data
	X_test_impute = imp.transform(X_test).copy()
	X_test_scaled = standard_scaler.transform(X_test_impute).copy()

	#Predictions and model evaluation on unseen data
	pred = lr.predict(X_test_scaled)
	print(f'RMSE on unseen data: {np.sqrt(mean_squared_error(y_test,pred))}')
	#Preprocessing and regressor pipeline
	pipeline = Pipeline(steps=[['imputer',imp],['scaler',standard_scaler],['regressor',knn]])

	#Running 5-fold cross-validation using pipeline as estimator
	cv = cross_validate(estimator=pipeline,X=X_train,y=y_train,cv=5,scoring="neg_root_mean_squared_error",return_train_score=True)

	#Calculating mean of the training scores of cross-validation
	print(f'Training RMSE (without data leakage): {-1 * np.mean(cv["train_score"])}')

	#Calculating mean of the validation scores of cross-validation
	print(f'validation RMSE (without data leakage): {-1 * np.mean(cv["test_score"])}')

	#fitting the pipeline to the training data
	pipeline.fit(X_train,y_train)

	#Predictions and model evaluation on unseen data
	pred = pipeline.predict(X_test)
	print(f'RMSE on unseen data: {np.sqrt(mean_squared_error(y_test,pred))}')