Pierre-Louis BESCOND pierrelouisbescond

Head of Data & Advanced Analytics

pierrelouisbescond / pca_3D_space.py

Created May 31, 2020 12:43

	min_axis, max_axis = -50, 50

	fig = go.Figure()
	# Let's plot the whole dataset
	fig.add_trace(go.Scatter3d(x=df["X"], y=df["y"], z=df["z"],mode='markers', name="dataset"))
	# And add a virtual 2D plan based on some of the dataset dots
	fig.add_trace(go.Scatter3d(x=[-14,0,4,15], y=[-17,-27,30,7], z=[-31,-1,15,41],mode='markers', surfaceaxis=1, opacity=0.5, name="surface"))
	fig.update_traces(marker=dict(size=3))
	fig.update_layout(
	scene = dict(

pierrelouisbescond / pca_create_dataset.py

Created May 31, 2020 12:41

	import numpy as np
	import pandas as pd
	import math
	import plotly.graph_objects as go

	size = 500
	# For every dimension, we create a normal distribution by setting the mean, standard deviation and size
	df = pd.DataFrame(np.random.normal(0, 5, size), columns=["x"])
	df["y"] = np.random.normal(0, 10, size)+df["x"]
	df["z"] = np.random.normal(0, 10, size)+df["x"]*2

pierrelouisbescond / abnormal_values_impact_results_and_display.py

Created May 28, 2020 17:10

	# We calculate the difference between the original performance of each model
	# against the performances achieved with corrupted and imputed data
	results_difference_from_initial = results.copy()

	for model_name in model_names:
	col_names_tmp = results_difference_from_initial.filter(like=model_name).columns
	for col in col_names_tmp:
	results_difference_from_initial[col]=initial_scores.loc[model_name,"Score"]-results_difference_from_initial[col]

	results_difference_from_initial

pierrelouisbescond / abnormal_values_impact_corrupt_impute_and_test.py

Created May 28, 2020 15:36

	# We create a list of incremental steps to perform
	steps = []
	for i in range (1,21): steps.append(i)

	# We define the imputation methods used
	imputation_methods = [SimpleImputer(strategy='constant', fill_value=0),
	SimpleImputer(strategy='mean'),
	IterativeImputer(),
	KNNImputer()]

pierrelouisbescond / abnormal_values_impact_initial_scores.py

Created May 28, 2020 15:16

	# We split the original DataFrame into train and test datasets
	X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)

	# We define the models to benchmark
	models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
	# And create the corresponding model's names' list
	model_names = []
	for model in models: model_names.append(type(model).__name__)

	# We record the original score achieved by each model on the "test" set after

pierrelouisbescond / abnormal_values_impact_make_regression.py

Created May 28, 2020 15:13

	# Let's import standard data process librairies
	import pandas as pd
	pd.options.display.max_columns = 15
	import numpy as np

	# Sklearn librairies for Data Generation, Imputation and Modeling
	from sklearn.datasets import make_regression

	from sklearn.experimental import enable_iterative_imputer
	from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer

pierrelouisbescond / appcall_back.py

Created May 23, 2020 16:02

	# The callback function will provide one "Ouput" in the form of a string (=children)
	@app.callback(Output(component_id="prediction_result",component_property="children"),
	# The values correspnding to the three sliders are obtained by calling their id and value property
	[Input("X1_slider","value"), Input("X2_slider","value"), Input("X3_slider","value")])

	# The input variable are set in the same order as the callback Inputs
	def update_prediction(X1, X2, X3):

	# We create a NumPy array in the form of the original features
	# ["Pressure","Viscosity","Particles_size", "Temperature","Inlet_flow", "Rotating_Speed","pH","Color_density"]

pierrelouisbescond / layout_html_structure.py

Last active May 24, 2020 06:04

	###############################################################################

	app = dash.Dash()

	# The page structure will be:
	# Features Importance Chart
	# <H4> Feature #1 name
	# Slider to update Feature #1 value
	# <H4> Feature #2 name
	# Slider to update Feature #2 value

pierrelouisbescond / create_barchart_and_sliders_labels_ranges.py

Created May 23, 2020 15:45

	# We create a Features Importance Bar Chart
	fig_features_importance = go.Figure()
	fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
	y=df_feature_importances["Importance"],
	marker_color='rgb(171, 226, 251)')
	)
	fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
	# The command below can be activated in a standard notebook to display the chart
	#fig_features_importance.show()

pierrelouisbescond / create_industrial_use_case_and_model.py

Created May 23, 2020 15:28

	# -- coding: utf-8 --

	# We start with the import of standard ML librairies
	import pandas as pd
	import numpy as np
	import math

	from sklearn.datasets import make_regression
	from sklearn.ensemble import RandomForestRegressor