tiaplagata · November 17, 2020 03:00
diff --git a/custom_pipeline_2.py b/custom_pipeline_2.py
 from sklearn.metrics import recall_score, make_scorer

 # Define the pipeline
 pipeline = Pipeline(steps= [
                    ("ColumnTransformer", SelectColumnsTransformer(columns=features_to_use)),
                    ("TransformCategorical", Transform_Categorical()),
                    ("SMOTE", SMOTE()),
                    ("GradientBooster", GradientBoostingClassifier())
                    ])

 # Create a param grid experimenting with different feature sets and parameters
 param_grid = {
              "ColumnTransformer__columns": [['account length', 'international plan', 'voice mail plan'],
                                          ['account length', 'international plan', 'total charge'], 
                                           ['international plan', 'total minutes', 'total calls']],
              "SMOTE__sampling_strategy": [0.5, 1],
              "GradientBooster__loss": ['deviance', 'exponential'],
              "GradientBooster__n_estimators": [100, 150],
              }

 # Run a grid search with the pipeline
 # Use scoring parameter to maximize the recall score
 gs_pipeline = GridSearchCV(pipeline, param_grid=param_grid, verbose=2, scoring=make_scorer(recall_score))
 gs_pipeline.fit(X_train, y_train)

 # Store the best model
 best_model = gs_pipeline.best_estimator_

 # Make predictions on our validation set and see recall score of best model
 y_validation_preds = best_model.predict(X_validation)
 recall_score(y_validation, y_validation_preds)
	from sklearn.metrics import recall_score, make_scorer

	# Define the pipeline
	pipeline = Pipeline(steps= [
	("ColumnTransformer", SelectColumnsTransformer(columns=features_to_use)),
	("TransformCategorical", Transform_Categorical()),
	("SMOTE", SMOTE()),
	("GradientBooster", GradientBoostingClassifier())
	])

	# Create a param grid experimenting with different feature sets and parameters
	param_grid = {
	"ColumnTransformer__columns": [['account length', 'international plan', 'voice mail plan'],
	['account length', 'international plan', 'total charge'],
	['international plan', 'total minutes', 'total calls']],
	"SMOTE__sampling_strategy": [0.5, 1],
	"GradientBooster__loss": ['deviance', 'exponential'],
	"GradientBooster__n_estimators": [100, 150],
	}

	# Run a grid search with the pipeline
	# Use scoring parameter to maximize the recall score
	gs_pipeline = GridSearchCV(pipeline, param_grid=param_grid, verbose=2, scoring=make_scorer(recall_score))
	gs_pipeline.fit(X_train, y_train)

	# Store the best model
	best_model = gs_pipeline.best_estimator_

	# Make predictions on our validation set and see recall score of best model
	y_validation_preds = best_model.predict(X_validation)
	recall_score(y_validation, y_validation_preds)