Created
March 10, 2021 03:22
-
-
Save cbrown5/876d8e6d6102ca53c71d0e8c06006172 to your computer and use it in GitHub Desktop.
Turn a dataframe that indicates what cluster each sample belongs to into a distance matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Function that converts dataframe of cluster identities | |
# to distance matrix representation of clusters (where rows/cols are samples/IDs and | |
# 0 indicates that pair are not in same cluster, 1 indicates they are in same cluster | |
as_dist_mat <- function(dtemp){ | |
# dtemp is dataframe with two columns: | |
#ID is a unique cell ID | |
#Cluster is a variable giving cluster a cell belongs to | |
#It should be arranged so we are certain | |
#cells are in ascending order | |
dmat <- matrix(0, nrow = nID, ncol = nID) | |
for (iclust in 1:nclust){ | |
dtemp2 <- subset(dtemp, Cluster == iclust) | |
#ID pairs - make dataframe where each row represents | |
# two cells in the same cluster | |
d <- expand.grid(dtemp2$ID, dtemp2$ID) | |
#Now match ID to get row/col positions | |
d$i <- match(d$Var1, IDs) | |
d$j <- match(d$Var2, IDs) | |
#convert coordinates to array indices | |
d$pos <- d$i + (d$j-1)*nID | |
#Now fill out matrix with 1s to represent IDs in same cluster | |
dmat[d$pos] <- 1 | |
} | |
dmat | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment