pierrelouisbescond · May 31, 2020 15:44
diff --git a/pca_calculation.py b/pca_calculation.py
 from sklearn.decomposition import PCA

 # The number of dimensions targeted here is 2, 1 less than  the original dataset
 pca = PCA(n_components=2)
 # we run the dimensions reduction on df
 pca.fit(df)

 # pca.explained_variance_ratio_ outputs the amount of variance explained by each vector
 print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1)))
 print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1)))
 print("The variance explained by the two vectors is {}%".format(round(100*pca.explained_variance_ratio_.sum(),1)))

 print("\nEigen Vectors Table:")
 display(pd.DataFrame(pca.components_, columns=["X","y","z"], index=["pc1","pc2"]).T)

 print("\nConverted Dataset:")
 # We use fit_transform to directly convert the original dataset into the reduced one
 df_pca = pd.DataFrame(pca.fit_transform(df), columns=["pc1","pc2"])
 display(df_pca)

 fig_pca = go.Figure()
 fig_pca.add_trace(go.Scatter(x=df_pca["pc1"], y=df_pca["pc2"], mode='markers'))
 fig_pca.update_traces(marker=dict(size=5))
 fig_pca.show()
	from sklearn.decomposition import PCA

	# The number of dimensions targeted here is 2, 1 less than the original dataset
	pca = PCA(n_components=2)
	# we run the dimensions reduction on df
	pca.fit(df)

	# pca.explained_variance_ratio_ outputs the amount of variance explained by each vector
	print("The variance from the original dataset explained thanks to the first vector is: {}%".format(round(100*pca.explained_variance_ratio_[0],1)))
	print("The variance from the original dataset explained thanks to the second vector is: {}%".format(round(100*pca.explained_variance_ratio_[1],1)))
	print("The variance explained by the two vectors is {}%".format(round(100*pca.explained_variance_ratio_.sum(),1)))

	print("\nEigen Vectors Table:")
	display(pd.DataFrame(pca.components_, columns=["X","y","z"], index=["pc1","pc2"]).T)

	print("\nConverted Dataset:")
	# We use fit_transform to directly convert the original dataset into the reduced one
	df_pca = pd.DataFrame(pca.fit_transform(df), columns=["pc1","pc2"])
	display(df_pca)

	fig_pca = go.Figure()
	fig_pca.add_trace(go.Scatter(x=df_pca["pc1"], y=df_pca["pc2"], mode='markers'))
	fig_pca.update_traces(marker=dict(size=5))
	fig_pca.show()