Created
November 13, 2012 13:10
-
-
Save microo8/4065693 to your computer and use it in GitHub Desktop.
PCA procedure in C using GSL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <gsl/gsl_matrix.h> | |
#include <gsl/gsl_statistics.h> | |
#include <gsl/gsl_eigen.h> | |
#include <gsl/gsl_blas.h> | |
gsl_matrix* pca(const gsl_matrix* data, unsigned int L) | |
{ | |
/* | |
@param data - matrix of data vectors, MxN matrix, each column is a data vector, M - dimension, N - data vector count | |
@param L - dimension reduction | |
*/ | |
assert(data != NULL); | |
assert(L > 0 && L < data->size2); | |
unsigned int i; | |
unsigned int rows = data->size1; | |
unsigned int cols = data->size2; | |
gsl_vector* mean = gsl_vector_alloc(rows); | |
for(i = 0; i < rows; i++) { | |
gsl_vector_set(mean, i, gsl_stats_mean(data->data + i * cols, 1, cols)); | |
} | |
// Get mean-substracted data into matrix mean_substracted_data. | |
gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols); | |
gsl_matrix_memcpy(mean_substracted_data, data); | |
for(i = 0; i < cols; i++) { | |
gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i); | |
gsl_vector_sub(&mean_substracted_point_view.vector, mean); | |
} | |
gsl_vector_free(mean); | |
// Compute Covariance matrix | |
gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows); | |
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix); | |
gsl_matrix_free(mean_substracted_data); | |
// Get eigenvectors, sort by eigenvalue. | |
gsl_vector* eigenvalues = gsl_vector_alloc(rows); | |
gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows); | |
gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows); | |
gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace); | |
gsl_eigen_symmv_free(workspace); | |
gsl_matrix_free(covariance_matrix); | |
// Sort the eigenvectors | |
gsl_eigen_symmv_sort(eigenvalues, eigenvectors, GSL_EIGEN_SORT_ABS_DESC); | |
gsl_vector_free(eigenvalues); | |
// Project the original dataset | |
gsl_matrix* result = gsl_matrix_alloc(L, cols); | |
gsl_matrix_view L_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, L); | |
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, &L_eigenvectors.matrix, data, 0.0, result); | |
gsl_matrix_free(eigenvectors); | |
// Result is n LxN matrix, each column is the original data vector with reduced dimension from M to L | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment