Created
September 5, 2017 20:51
-
-
Save gokceneraslan/3c9b4838bcf0d03211db2e85fb7219b1 to your computer and use it in GitHub Desktop.
Super cool t-kernel and kNN based imputation method for scRNA-seq data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(FNN) | |
library(parallel) | |
# cnt: cell x gene matrix (can be raw count or library size normalized, but not log transformed) | |
impute <- function(cnt, k=30, cores=6, pca.dim=100) { | |
print(paste0('Starting imputation with ', nrow(cnt), ' cells and ', ncol(cnt), ' genes...')) | |
print('Performing PCA...') | |
pca <- prcomp(log(cnt+1), center = T, rank.=pca.dim)$x | |
impute.cell <- function(count, dist) { | |
probs <- dt(dist, df=1) | |
probs <- probs / (sum(probs)+.Machine$double.eps) | |
colSums(count * probs) | |
} | |
print('Finding nns...') | |
nn <- get.knn(pca, k=k) | |
print('Imputing...') | |
mat <- mclapply(seq_len(nrow(cnt)), function(ix){ | |
impute.cell(cnt[nn$nn.index[ix,],], nn$nn.dist[ix,]) | |
}, mc.cores = cores) | |
t(simplify2array(mat)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment