Last active
May 29, 2024 10:13
-
-
Save ATpoint/b2a87308ba0935296da0c7ba76a83d89 to your computer and use it in GitHub Desktop.
Vectorized solution to convert UCell scoring output to labels, dealing with ambiguous calls.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' Take output of UCell::ScoreSignatures_UCell() and return a vector of labels with highest score. | |
| #' If the highest score per row is unique (only one value is the highest value) return that label. | |
| #' If all scores are zero return "NA". | |
| #' If more than one value is the highest value return "ambiguous". | |
| #' library(UCell) | |
| #' data(sample.matrix) | |
| #' gene.sets <- list(Tcell_signature = c("CD2","CD3E","CD3D"), | |
| #' Myeloid_signature = c("SPI1","FCER1G","CSF1R")) | |
| #' scores <- ScoreSignatures_UCell(sample.matrix, features=gene.sets) | |
| #' convertUCell(scores) | |
| #' | |
| convertUCell <- function(ucell){ | |
| isRowMax <- Biobase::rowMax(ucell) | |
| # find cells with only zeros | |
| j.allZero <- names(which(rowSums(ucell==0)==ncol(ucell))) | |
| # find cells that have > 1 entry with identical but non-zero score | |
| j.ambiguous <- setdiff(names(which(rowSums(ucell==isRowMax) > 1)), j.allZero) | |
| # the rest is unique | |
| j.unique <- setdiff(rownames(ucell), c(j.allZero, j.ambiguous)) | |
| u.unique <- ucell[j.unique,] | |
| l.unique <- colnames(u.unique)[as.numeric(apply(u.unique, 1, which.max))] | |
| # put together | |
| d <- data.frame(label="NA", names=rownames(ucell), row.names=rownames(ucell)) | |
| d[j.allZero,"label"] <- "NA" | |
| d[j.ambiguous,"label"] <- "ambiguous" | |
| d[j.unique,"label"] <- l.unique | |
| gsub("_UCell", "", d[rownames(ucell),"label",drop=TRUE]) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment