Skip to content

Instantly share code, notes, and snippets.

View pierrelouisbescond's full-sized avatar

Pierre-Louis BESCOND pierrelouisbescond

View GitHub Profile
# Let's see how the correlation coefficient evolves as the shift number increases
# and record the successive values into a DataFrame
shift_corr_results = pd.DataFrame(columns=["x1_shifted","x2_shifted","x3_shifted"], dtype=float)
for feature in shift_corr_results.columns:
# We define a shift range from 0 to 50 but it should be adapted to every use-case
for shift_value in range(0,50):
# The correlation coefficient is calculated
for feature in ["x1","x2","x3"]:
# We create a shifted feature matching the original one
feature_new_name = feature + "_shifted"
# The shift is determined randomly
random_shift = np.random.randint(10,50)
df[feature_new_name] = df[feature].shift(-random_shift)
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# Let's start by creating our index
dataset_size = 1000
idx = np.linspace(0,20, dataset_size)
# x1, x2 have a cyclical behavior, quite close from each other
x1 = np.cos(idx) + 0.2 * np.random.random(dataset_size)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
df_selected = df[["cos_x","sin_x"]].sample(15).sort_index()
display(df_selected)
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_selected.cos_x, y=df_selected.sin_x, mode="markers"))
fig.update_layout(xaxis = dict(title="cos_x"),
yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1))
fig = go.Figure()
# We use [::24] to extract only 60 rows from the 1440
fig.add_trace(go.Scatter(x=df.cos_x[::24], y=df.sin_x[::24], mode="markers"))
fig.update_layout(xaxis = dict(title="cos_x"),
yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1))
fig.show()
df["sin_x"] = np.sin(df["x_norm"])
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.x_norm, y=df.cos_x, name='cos_x'))
fig.add_trace(go.Scatter(x=df.x_norm, y=df.sin_x, name='sin_x'))
fig.update_layout(yaxis = dict(scaleanchor = "x", scaleratio = 1))
# We normalize x values to match with the 0-2π cycle
df["x_norm"] = 2 * math.pi * df["x"] / df["x"].max()
df["cos_x"] = np.cos(df["x_norm"])
display(df)
import plotly.graph_objects as go
fig = go.Figure()
import pandas as pd
import numpy as np
import math
# We create the DataFrame as a date range between 6/1/2020 (US format) and 6/2/2020 -1
df = pd.DataFrame(index=pd.date_range(start='6/1/2020', end='6/2/2020', freq='min')[:-1])
# We create an integer array from 0 to 1439 (= 24 hours x 60 minutes)
df["x"]=np.linspace(0, 24 * 60 - 1, 24 * 60, dtype=int)
df
from sklearn.decomposition import PCA
# The number of dimensions targeted here is 2, 1 less than the original dataset
pca = PCA(n_components=2)
# we run the dimensions reduction on df
pca.fit(df)
# pca.explained_variance_ratio_ outputs the amount of variance explained by each vector
print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1)))
print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1)))