Created
March 24, 2022 21:55
-
-
Save JakeConway/5af0c7de38ce0a27d6ca6116b04e1ca7 to your computer and use it in GitHub Desktop.
CCF inference
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
purity.check <- function(maf) { | |
maf[which(is.na(maf$purity)), ]$purity <- 0 | |
samples <- length(unique(maf[which(maf$purity == 0), ]$Tumor_Sample_Barcode)) | |
rows <- nrow(maf[which(maf$purity == 0), ]) | |
maf <- maf[which(maf$purity > 0), ] | |
cat("Removed", rows, "rows from", samples, "samples with purity values of 0 or NA\n") | |
return(maf) | |
} | |
tcn.check <- function(maf) { | |
rows <- nrow(maf[which(is.na(maf$tcn)), ]) | |
maf <- maf[which(!is.na(maf$tcn)), ] | |
cat("Removed", rows, "rows with NA tcn values\n") | |
rows <- nrow(maf[which(maf$tcn == 0), ]) | |
maf[which(maf$tcn == 0), ]$tcn <- 1 | |
cat(rows, "rows with tcn of 0 were set to tcn of 1\n") | |
return(maf) | |
} | |
format.maf <- function(maf) { | |
# format chromosomes | |
maf$Chromosome <- as.character(maf$Chromosome) | |
maf$Chromosome <- as.character(maf$Chromosome) | |
if(c("X") %in% maf$Chromosome) { | |
maf[which(maf$Chromosome == "X"), ]$Chromosome <- "23" | |
} | |
if(c("Y") %in% maf$Chromosome) { | |
maf[which(maf$Chromosome == "Y"), ]$Chromosome <- "24" | |
} | |
maf$Chromosome <- as.numeric(maf$Chromosome) | |
# check gender | |
if(!"gender" %in% names(maf)) { | |
maf$gender <- NA | |
} | |
# format copy number columns | |
maf$absCN <- maf$tcn | |
maf$major_CN <- maf$absCN - maf$lcn | |
maf$minor_CN <- maf$lcn | |
maf$one_copy_CN <- 1 | |
maf <- maf[which(!is.na(maf$Chromosome)), ] | |
return(maf) | |
} | |
multiplicity.cn <- function(maf) { | |
vaf <- maf$t_alt_count/maf$depth | |
m <- vaf * (1/maf$purity) * (maf$purity * maf$absCN + (1 - maf$purity) * 2) | |
m[which(m < 1)] <- 1 | |
m <- round(m) | |
maf$multiplicity_CN <- m | |
return(maf) | |
} | |
ccf.likelihood <- function(purity, | |
absCN, | |
alt_allele, | |
coverage, | |
copies, | |
chromosome, | |
gender) { | |
#From McGranahan_and_Swanton_2015 | |
CCFs <- seq(0.00, 1.00, 0.01) | |
ref.copies <- 2 | |
if(chromosome == "24") { | |
ref.copies <- 1 | |
} | |
# Female and NA genders will have ref.copies of 2 | |
# Prior studies have kept ref.copies as 2 despite gender | |
if(chromosome == "23") { | |
if(gender == "Male") { | |
ref.copies <- 1 | |
} else { | |
ref.copies <- 2 | |
} | |
} | |
vac.ccf <- function(CCF, purity, absCN) { | |
purity * CCF * copies / (ref.copies * (1 - purity) + purity * absCN) | |
} | |
probs <- sapply(CCFs, function(c) { | |
dbinom(alt_allele, coverage, vac.ccf(c, purity, absCN)) | |
}) | |
probs <- probs / sum(probs) | |
ccf.max <- which.max(probs) | |
max.prob <- max(probs) | |
ccf.gt.half.max <- which(probs > max(probs) / 2) | |
ccf.lower <- | |
max(ccf.gt.half.max[1] - 1, 1) ### closest ccf value before half-max range (within 0-1 range) | |
ccf.upper <- | |
min(ccf.gt.half.max[length(ccf.gt.half.max)] + 1, length(CCFs)) ### closest ccf value after half-max range (within 0-1 range) | |
if (is.na(purity)) { | |
ccf.upper <- NA | |
} | |
ccf.max <- ccf.max / length(CCFs) | |
ccf.lower <- ccf.lower / length(CCFs) | |
ccf.upper <- ccf.upper / length(CCFs) | |
#if(is.na(purity)){ccf.upper<-NA} | |
p100 <- sum(probs[101]) | |
p80 <- sum(probs[81:101]) | |
p50 <- sum(probs[51:101]) | |
as.list(c(probs, ccf.max, max.prob, p100, p80, p50)) | |
} | |
determineMaxLikelihood <- function(major_CCF, | |
minor_CCF, | |
multiplicity_CCF, | |
one_copy_CCF, | |
major_max_prob, | |
minor_max_prob, | |
multiplicity_max_prob, | |
one_copy_max_prob) { | |
copy.number.options <- c("major", "minor", "multiplicity", "one_copy") | |
max.probs <- c(major_max_prob, minor_max_prob, multiplicity_max_prob, one_copy_max_prob) | |
max.index <- which(max.probs == max(max.probs, na.rm = T))[1] | |
ccfs <- c(major_CCF, minor_CCF, multiplicity_CCF, one_copy_CCF) | |
mle.copy.number <- copy.number.options[max.index] | |
as.list(c(ccfs[max.index], paste0(mle.copy.number, "_CN"))) | |
} | |
ccfAnnotate <- function(maf) { | |
maf <- maf[order(maf$Tumor_Sample_Barcode), ] | |
maf <- purity.check(maf) | |
maf <- tcn.check(maf) | |
maf <- format.maf(maf) | |
maf <- multiplicity.cn(maf) | |
# maf <- as.data.frame(maf) | |
maf <- maf[which(!is.na(maf$absCN)), ] | |
maf <- maf[which(!is.na(maf$major_CN)), ] | |
print("Determining likelihoods for major CN") | |
maf[, c(paste0("major_ccf_", seq(0, 1, 0.01)), "major_CCF", "major_max_prob", "major_100", "major_80", "major_50") := | |
ccf.likelihood(purity, | |
absCN, | |
t_alt_count, | |
(t_alt_count + t_ref_count), | |
copies = major_CN, | |
chromosome = Chromosome, | |
gender), by = 1:nrow(maf)] | |
print("Determining likelihoods for minor CN") | |
maf[, c(paste0("minor_ccf_", seq(0, 1, 0.01)), "minor_CCF", "minor_max_prob", "minor_100", "minor_80", "minor_50") := | |
ccf.likelihood(purity, | |
absCN, | |
t_alt_count, | |
(t_alt_count + t_ref_count), | |
copies = minor_CN, | |
chromosome = Chromosome, | |
gender), by = 1:nrow(maf)] | |
print("Determining likelihoods for expected multiplicity CN") | |
maf[, c(paste0("multiplicity_ccf_", seq(0, 1, 0.01)), "multiplicity_CCF", "multiplicity_max_prob", "multiplicity_100", "multiplicity_80", "multiplicity_50") := | |
ccf.likelihood(purity, | |
absCN, | |
t_alt_count, | |
(t_alt_count + t_ref_count), | |
copies = multiplicity_CN, | |
chromosome = Chromosome, | |
gender), by = 1:nrow(maf)] | |
print("Determining likelihoods for mutation on only one copy (after CNA event)") | |
maf[, c(paste0("one_copy_ccf_", seq(0, 1, 0.01)), "one_copy_CCF", "one_copy_max_prob", "one_copy_100", "one_copy_80", "one_copy_50") := | |
ccf.likelihood(purity, | |
absCN, | |
t_alt_count, | |
(t_alt_count + t_ref_count), | |
copies = one_copy_CN, | |
chromosome = Chromosome, | |
gender), by = 1:nrow(maf)] | |
print("Determining MLE of CCF across CNs") | |
maf[ ,c("CCF", "mle_CN") := determineMaxLikelihood(major_CCF, | |
minor_CCF, | |
multiplicity_CCF, | |
one_copy_CCF, | |
major_max_prob, | |
minor_max_prob, | |
multiplicity_max_prob, | |
one_copy_max_prob), by = 1:nrow(maf)] | |
return(maf) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment