def get_min_pca(scaled_data, min_var_explained=0.95, verbose=False):
"""
Decompose `scaled_data` into principal components.
Return the number of components above `min_var_explained` threshold,
the threshold value and the transformed data.
Args
----
scaled_data: numpy array: scaled data
min_var_explained: float, default 0.95: variance explained threshold
verbose: Bool, default False: print results
Return
------
pcs, min_var_explained and reduced scaled data
"""
pca_all = PCA(n_components=min_var_explained, svd_solver='full')
pca_all.fit(scaled_data)
reduced = pca_all.transform(scaled_data)
pcs = pca_all.n_components_
if verbose:
print(f'Components for {min_var_explained:.0%} of variance explained:\n{pcs}')
return pcs, min_var_explained, reduced
Last active
December 17, 2021 20:07
-
-
Save CatChenal/c7f71f85c311d22f007200b5f94442c1 to your computer and use it in GitHub Desktop.
Function to get the data reduced with the minimal number of components when using PCA decomposition.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment