Created
April 28, 2021 14:24
-
-
Save simon-anders/76495573511916c7adc78a238627aba5 to your computer and use it in GitHub Desktop.
Imputation via PCA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library( irlba ) | |
m <- 10000 # nbr of features (rows) | |
n <- 5000 # nbr of cells (colums) | |
r <- 5 # nbr of latent components | |
## The true latent values | |
# True importance of latent factors | |
true_importance <- c( 1, .8, .4, .2, .1 ) | |
# For each cell, draw its latent variates | |
latent_true <- t( sapply( true_importance, function(s) rnorm( n, sd=s ) ) ) | |
# For each latent variable, draw its influence on the features | |
latent_influence <- matrix( rnorm( m*r ), nrow=m, ncol=r ) | |
## Initial data | |
# From this, we get the expression means. Add some noise to it | |
expr <- latent_influence %*% latent_true + | |
matrix( rnorm( m*n, sd=.2 ), nrow=m, ncol=n ) | |
## Do PCA: | |
#pr <- prcomp( t(expr) ) | |
pr <- prcomp_irlba( t(expr), 5 ) | |
plot( pr$sdev / pr$sdev[1] ); points( 1:5, true_importance / true_importance[1], col="blue", cex=.2 ) | |
plot( pr$x[,1], latent_true[1,] ) | |
plot( pr$rotation[,1], latent_influence[,1] ) | |
# Compare a cell: | |
plot( ( pr$x %*% t(pr$rotation) )[17,], expr[,17] ) | |
## Remove some data | |
expr1 <- expr | |
expr1[ runif(length(expr1)) < .6 ] <- NA | |
## Try to get the data back | |
# Copy to working matrix | |
expr_w <- expr1 | |
# Initialise by setting the missing values to zero | |
expr_w[ is.na(expr1) ] <- 0 | |
# BEGIN LOOP | |
# Plot, to comapre for a cell how good we have reconstructed the data | |
plot( expr[,17], expr_w[,17] ) | |
# Do a PCR obn the working matrix | |
pr <- prcomp_irlba( t(expr_w), 5 ) | |
# Replace missing data with prediction from PCA | |
expr_w[ is.na(expr1) ] <- t( pr$x %*% t(pr$rotation) )[ is.na(expr1) ] | |
# END LOOP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment