pierrelouisbescond · May 28, 2020 15:16
diff --git a/abnormal_values_impact_initial_scores.py b/abnormal_values_impact_initial_scores.py
 # We split the original DataFrame into train and test datasets
 X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)

 # We define the models to benchmark
 models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
 # And create the corresponding model's names' list
 model_names = []
 for model in models: model_names.append(type(model).__name__)

 # We record the original score achieved by each model on the "test" set after
 # being trained on "train" sets
 initial_scores = []

 for model_choice in models:
  model_choice.fit(X_train, y_train)
  initial_scores.append(model_choice.score(X_test, y_test))

 initial_scores = pd.DataFrame(initial_scores, columns=["Score"], index=model_names)
 display(initial_scores)
	# We split the original DataFrame into train and test datasets
	X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)

	# We define the models to benchmark
	models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
	# And create the corresponding model's names' list
	model_names = []
	for model in models: model_names.append(type(model).__name__)

	# We record the original score achieved by each model on the "test" set after
	# being trained on "train" sets
	initial_scores = []

	for model_choice in models:
	model_choice.fit(X_train, y_train)
	initial_scores.append(model_choice.score(X_test, y_test))

	initial_scores = pd.DataFrame(initial_scores, columns=["Score"], index=model_names)
	display(initial_scores)