Pierre-Louis BESCOND pierrelouisbescond

Head of Data & Advanced Analytics

pierrelouisbescond / process_cinetic_shift_analysis.py

Created June 22, 2020 05:54

	# Let's see how the correlation coefficient evolves as the shift number increases
	# and record the successive values into a DataFrame
	shift_corr_results = pd.DataFrame(columns=["x1_shifted","x2_shifted","x3_shifted"], dtype=float)

	for feature in shift_corr_results.columns:

	# We define a shift range from 0 to 50 but it should be adapted to every use-case
	for shift_value in range(0,50):

	# The correlation coefficient is calculated

pierrelouisbescond / process_cinetic_shift_features.py

Created June 22, 2020 04:37

	for feature in ["x1","x2","x3"]:

	# We create a shifted feature matching the original one
	feature_new_name = feature + "_shifted"

	# The shift is determined randomly
	random_shift = np.random.randint(10,50)

	df[feature_new_name] = df[feature].shift(-random_shift)

pierrelouisbescond / process_cinetic_create_and_plot_dataset.py

Last active June 22, 2020 09:49

	import pandas as pd
	import numpy as np
	import plotly.graph_objects as go

	# Let's start by creating our index
	dataset_size = 1000
	idx = np.linspace(0,20, dataset_size)

	# x1, x2 have a cyclical behavior, quite close from each other
	x1 = np.cos(idx) + 0.2 * np.random.random(dataset_size)

pierrelouisbescond / augmented_data.ipynb

Created June 13, 2020 11:46

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

pierrelouisbescond / fe_cyclical_random_and_plot.py

Last active June 7, 2020 06:30

	df_selected = df[["cos_x","sin_x"]].sample(15).sort_index()
	display(df_selected)

	fig = go.Figure()

	fig.add_trace(go.Scatter(x=df_selected.cos_x, y=df_selected.sin_x, mode="markers"))

	fig.update_layout(xaxis = dict(title="cos_x"),
	yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1))

pierrelouisbescond / fe_cyclical_cosine_sine_coord_plot.py

Created June 7, 2020 06:27

	fig = go.Figure()

	# We use [::24] to extract only 60 rows from the 1440
	fig.add_trace(go.Scatter(x=df.cos_x[::24], y=df.sin_x[::24], mode="markers"))

	fig.update_layout(xaxis = dict(title="cos_x"),
	yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1))

	fig.show()

pierrelouisbescond / fe_cyclical_add_sine_and_plot.py

Created June 7, 2020 06:24

	df["sin_x"] = np.sin(df["x_norm"])

	import plotly.graph_objects as go

	fig = go.Figure()

	fig.add_trace(go.Scatter(x=df.x_norm, y=df.cos_x, name='cos_x'))
	fig.add_trace(go.Scatter(x=df.x_norm, y=df.sin_x, name='sin_x'))

	fig.update_layout(yaxis = dict(scaleanchor = "x", scaleratio = 1))

pierrelouisbescond / fe_cyclical_cosine_and_plot.py

Last active September 11, 2022 09:05

	# We normalize x values to match with the 0-2π cycle
	df["x_norm"] = 2 * math.pi * df["x"] / df["x"].max()

	df["cos_x"] = np.cos(df["x_norm"])

	display(df)

	import plotly.graph_objects as go

	fig = go.Figure()

pierrelouisbescond / fe_cyclical_time_to_integers.py

Last active September 11, 2022 08:55

	import pandas as pd
	import numpy as np
	import math

	# We create the DataFrame as a date range between 6/1/2020 (US format) and 6/2/2020 -1
	df = pd.DataFrame(index=pd.date_range(start='6/1/2020', end='6/2/2020', freq='min')[:-1])

	# We create an integer array from 0 to 1439 (= 24 hours x 60 minutes)
	df["x"]=np.linspace(0, 24 * 60 - 1, 24 * 60, dtype=int)
	df

pierrelouisbescond / pca_calculation.py

Last active May 31, 2020 15:44

	from sklearn.decomposition import PCA

	# The number of dimensions targeted here is 2, 1 less than the original dataset
	pca = PCA(n_components=2)
	# we run the dimensions reduction on df
	pca.fit(df)

	# pca.explained_variance_ratio_ outputs the amount of variance explained by each vector
	print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1)))
	print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1)))