This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's see how the correlation coefficient evolves as the shift number increases | |
# and record the successive values into a DataFrame | |
shift_corr_results = pd.DataFrame(columns=["x1_shifted","x2_shifted","x3_shifted"], dtype=float) | |
for feature in shift_corr_results.columns: | |
# We define a shift range from 0 to 50 but it should be adapted to every use-case | |
for shift_value in range(0,50): | |
# The correlation coefficient is calculated |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for feature in ["x1","x2","x3"]: | |
# We create a shifted feature matching the original one | |
feature_new_name = feature + "_shifted" | |
# The shift is determined randomly | |
random_shift = np.random.randint(10,50) | |
df[feature_new_name] = df[feature].shift(-random_shift) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import plotly.graph_objects as go | |
# Let's start by creating our index | |
dataset_size = 1000 | |
idx = np.linspace(0,20, dataset_size) | |
# x1, x2 have a cyclical behavior, quite close from each other | |
x1 = np.cos(idx) + 0.2 * np.random.random(dataset_size) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_selected = df[["cos_x","sin_x"]].sample(15).sort_index() | |
display(df_selected) | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=df_selected.cos_x, y=df_selected.sin_x, mode="markers")) | |
fig.update_layout(xaxis = dict(title="cos_x"), | |
yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig = go.Figure() | |
# We use [::24] to extract only 60 rows from the 1440 | |
fig.add_trace(go.Scatter(x=df.cos_x[::24], y=df.sin_x[::24], mode="markers")) | |
fig.update_layout(xaxis = dict(title="cos_x"), | |
yaxis = dict(title="sin_x", scaleanchor = "x", scaleratio = 1)) | |
fig.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df["sin_x"] = np.sin(df["x_norm"]) | |
import plotly.graph_objects as go | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=df.x_norm, y=df.cos_x, name='cos_x')) | |
fig.add_trace(go.Scatter(x=df.x_norm, y=df.sin_x, name='sin_x')) | |
fig.update_layout(yaxis = dict(scaleanchor = "x", scaleratio = 1)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We normalize x values to match with the 0-2π cycle | |
df["x_norm"] = 2 * math.pi * df["x"] / df["x"].max() | |
df["cos_x"] = np.cos(df["x_norm"]) | |
display(df) | |
import plotly.graph_objects as go | |
fig = go.Figure() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import math | |
# We create the DataFrame as a date range between 6/1/2020 (US format) and 6/2/2020 -1 | |
df = pd.DataFrame(index=pd.date_range(start='6/1/2020', end='6/2/2020', freq='min')[:-1]) | |
# We create an integer array from 0 to 1439 (= 24 hours x 60 minutes) | |
df["x"]=np.linspace(0, 24 * 60 - 1, 24 * 60, dtype=int) | |
df |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.decomposition import PCA | |
# The number of dimensions targeted here is 2, 1 less than the original dataset | |
pca = PCA(n_components=2) | |
# we run the dimensions reduction on df | |
pca.fit(df) | |
# pca.explained_variance_ratio_ outputs the amount of variance explained by each vector | |
print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1))) | |
print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1))) |