accessnash · August 15, 2018 21:46
diff --git a/bias_var_tradeoff.py b/bias_var_tradeoff.py
 from sklearn.model_selection import train_test_split

 # Set SEED for reproducibility
 SEED = 1

 # Split the data into 70% train and 30% test
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

 # Instantiate a DecisionTreeRegressor dt
 dt = DecisionTreeRegressor(max_depth=4, min_samples_leaf=0.26, random_state=SEED)

 from sklearn.model_selection import cross_val_score

 # Compute the array containing the 10-folds CV MSEs
 MSE_CV_scores = - cross_val_score(dt, X_train, y_train, cv=10, 
                       scoring='neg_mean_squared_error',
                       n_jobs=-1)

 # Compute the 10-folds CV RMSE
 RMSE_CV = (MSE_CV_scores.mean())**(0.5)

 # Print RMSE_CV
 print('CV RMSE: {:.2f}'.format(RMSE_CV))

 from sklearn.metrics import mean_squared_error as MSE

 # Fit dt to the training set
 dt.fit(X_train, y_train)

 # Predict the labels of the training set
 y_pred_train = dt.predict(X_train)

 # Evaluate the training set RMSE of dt
 RMSE_train = (MSE(y_train, y_pred_train))**(1/2)

 # Print RMSE_train
 print('Train RMSE: {:.2f}'.format(RMSE_train))

 # Ensemble learning

 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.tree import DecisionTreeClassifier

 # Instantiate lr
 lr = LogisticRegression(random_state=SEED)

 # Instantiate knn
 knn = KNeighborsClassifier(n_neighbors=27)

 # Instantiate dt
 dt = DecisionTreeClassifier(min_samples_leaf=0.13, random_state=SEED)

 # Define the list classifiers
 classifiers = [('Logistic Regression', lr), ('K Nearest Neighbours', knn), ('Classification Tree', dt)]

 for clf_name, clf in classifiers:    
 
    # Fit clf to the training set
    clf.fit(X_train, y_train)    
   
    # Predict y_pred
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred) 
   
    # Evaluate clf's accuracy on the test set
    print('{:s} : {:.3f}'.format(clf_name, accuracy))
    
 from sklearn.ensemble import VotingClassifier

 # Instantiate a VotingClassifier vc
 vc = VotingClassifier(estimators=classifiers)     

 # Fit vc to the training set
 vc.fit(X_train, y_train)   

 # Evaluate the test set predictions
 y_pred = vc.predict(X_test)

 # Calculate accuracy score
 accuracy = accuracy_score(y_test, y_pred)
 print('Voting Classifier: {:.3f}'.format(accuracy))
	from sklearn.model_selection import train_test_split

	# Set SEED for reproducibility
	SEED = 1

	# Split the data into 70% train and 30% test
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

	# Instantiate a DecisionTreeRegressor dt
	dt = DecisionTreeRegressor(max_depth=4, min_samples_leaf=0.26, random_state=SEED)

	from sklearn.model_selection import cross_val_score

	# Compute the array containing the 10-folds CV MSEs
	MSE_CV_scores = - cross_val_score(dt, X_train, y_train, cv=10,
	scoring='neg_mean_squared_error',
	n_jobs=-1)

	# Compute the 10-folds CV RMSE
	RMSE_CV = (MSE_CV_scores.mean())**(0.5)

	# Print RMSE_CV
	print('CV RMSE: {:.2f}'.format(RMSE_CV))

	from sklearn.metrics import mean_squared_error as MSE

	# Fit dt to the training set
	dt.fit(X_train, y_train)

	# Predict the labels of the training set
	y_pred_train = dt.predict(X_train)

	# Evaluate the training set RMSE of dt
	RMSE_train = (MSE(y_train, y_pred_train))**(1/2)

	# Print RMSE_train
	print('Train RMSE: {:.2f}'.format(RMSE_train))

	# Ensemble learning

	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.tree import DecisionTreeClassifier

	# Instantiate lr
	lr = LogisticRegression(random_state=SEED)

	# Instantiate knn
	knn = KNeighborsClassifier(n_neighbors=27)

	# Instantiate dt
	dt = DecisionTreeClassifier(min_samples_leaf=0.13, random_state=SEED)

	# Define the list classifiers
	classifiers = [('Logistic Regression', lr), ('K Nearest Neighbours', knn), ('Classification Tree', dt)]

	for clf_name, clf in classifiers:

	# Fit clf to the training set
	clf.fit(X_train, y_train)

	# Predict y_pred
	y_pred = clf.predict(X_test)

	# Calculate accuracy
	accuracy = accuracy_score(y_test, y_pred)

	# Evaluate clf's accuracy on the test set
	print('{:s} : {:.3f}'.format(clf_name, accuracy))

	from sklearn.ensemble import VotingClassifier

	# Instantiate a VotingClassifier vc
	vc = VotingClassifier(estimators=classifiers)

	# Fit vc to the training set
	vc.fit(X_train, y_train)

	# Evaluate the test set predictions
	y_pred = vc.predict(X_test)

	# Calculate accuracy score
	accuracy = accuracy_score(y_test, y_pred)
	print('Voting Classifier: {:.3f}'.format(accuracy))
No results found