Skip to content

Instantly share code, notes, and snippets.

@pierrelouisbescond
Last active May 31, 2020 15:44
Show Gist options
  • Save pierrelouisbescond/a8f8a130b40ba720635164ae72e59695 to your computer and use it in GitHub Desktop.
Save pierrelouisbescond/a8f8a130b40ba720635164ae72e59695 to your computer and use it in GitHub Desktop.
from sklearn.decomposition import PCA
# The number of dimensions targeted here is 2, 1 less than the original dataset
pca = PCA(n_components=2)
# we run the dimensions reduction on df
pca.fit(df)
# pca.explained_variance_ratio_ outputs the amount of variance explained by each vector
print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1)))
print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1)))
print("The variance explained by the two vectors is {}%".format(round(100*pca.explained_variance_ratio_.sum(),1)))
print("\nEigen Vectors Table:")
display(pd.DataFrame(pca.components_, columns=["X","y","z"], index=["pc1","pc2"]).T)
print("\nConverted Dataset:")
# We use fit_transform to directly convert the original dataset into the reduced one
df_pca = pd.DataFrame(pca.fit_transform(df), columns=["pc1","pc2"])
display(df_pca)
fig_pca = go.Figure()
fig_pca.add_trace(go.Scatter(x=df_pca["pc1"], y=df_pca["pc2"], mode='markers'))
fig_pca.update_traces(marker=dict(size=5))
fig_pca.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment