Last active
June 17, 2019 16:03
-
-
Save mw3i/957155e0834adab38cd938b0bbe3a633 to your computer and use it in GitHub Desktop.
PCA in pure Numpy (based on Sebastian Raschka tutorial)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Implementation of PCA with Numpy (using covariance), based on this tutorial by Sebastian Raschka: https://sebastianraschka.com/Articles/2014_pca_step_by_step.html | |
''' | |
import numpy as np | |
def get_components(data: np.ndarray) -> np.ndarray: | |
cov_mat = np.cov(data.T) # <-- get the covariance matrix | |
## calculate eigenvalues of the covariance matrix | |
eig_val, eig_vec = np.linalg.eig(cov_mat) | |
# sort components, largest to smallest | |
idx_sort = np.flip(eig_val.argsort()) # <-- get ordering of eigenvectors: largest to smallest | |
components = eig_vec[:,idx_sort] | |
return components | |
## run example: | |
if __name__ == '__main__': | |
##__Generate Data | |
data = np.random.normal(0,1,[100,3]) # <-- generates random data (assuming rows are instances, & columns are "features") | |
##__Get Components | |
components = get_components(data) | |
##__Transform data using top 2 components (ie, matmul) | |
num_components = 2 | |
transformed_data = data @ components[:,:num_components] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment