Skip to content

Instantly share code, notes, and snippets.

@JakeConway
Created March 24, 2022 21:55
Show Gist options
  • Save JakeConway/5af0c7de38ce0a27d6ca6116b04e1ca7 to your computer and use it in GitHub Desktop.
Save JakeConway/5af0c7de38ce0a27d6ca6116b04e1ca7 to your computer and use it in GitHub Desktop.
CCF inference
purity.check <- function(maf) {
maf[which(is.na(maf$purity)), ]$purity <- 0
samples <- length(unique(maf[which(maf$purity == 0), ]$Tumor_Sample_Barcode))
rows <- nrow(maf[which(maf$purity == 0), ])
maf <- maf[which(maf$purity > 0), ]
cat("Removed", rows, "rows from", samples, "samples with purity values of 0 or NA\n")
return(maf)
}
tcn.check <- function(maf) {
rows <- nrow(maf[which(is.na(maf$tcn)), ])
maf <- maf[which(!is.na(maf$tcn)), ]
cat("Removed", rows, "rows with NA tcn values\n")
rows <- nrow(maf[which(maf$tcn == 0), ])
maf[which(maf$tcn == 0), ]$tcn <- 1
cat(rows, "rows with tcn of 0 were set to tcn of 1\n")
return(maf)
}
format.maf <- function(maf) {
# format chromosomes
maf$Chromosome <- as.character(maf$Chromosome)
maf$Chromosome <- as.character(maf$Chromosome)
if(c("X") %in% maf$Chromosome) {
maf[which(maf$Chromosome == "X"), ]$Chromosome <- "23"
}
if(c("Y") %in% maf$Chromosome) {
maf[which(maf$Chromosome == "Y"), ]$Chromosome <- "24"
}
maf$Chromosome <- as.numeric(maf$Chromosome)
# check gender
if(!"gender" %in% names(maf)) {
maf$gender <- NA
}
# format copy number columns
maf$absCN <- maf$tcn
maf$major_CN <- maf$absCN - maf$lcn
maf$minor_CN <- maf$lcn
maf$one_copy_CN <- 1
maf <- maf[which(!is.na(maf$Chromosome)), ]
return(maf)
}
multiplicity.cn <- function(maf) {
vaf <- maf$t_alt_count/maf$depth
m <- vaf * (1/maf$purity) * (maf$purity * maf$absCN + (1 - maf$purity) * 2)
m[which(m < 1)] <- 1
m <- round(m)
maf$multiplicity_CN <- m
return(maf)
}
ccf.likelihood <- function(purity,
absCN,
alt_allele,
coverage,
copies,
chromosome,
gender) {
#From McGranahan_and_Swanton_2015
CCFs <- seq(0.00, 1.00, 0.01)
ref.copies <- 2
if(chromosome == "24") {
ref.copies <- 1
}
# Female and NA genders will have ref.copies of 2
# Prior studies have kept ref.copies as 2 despite gender
if(chromosome == "23") {
if(gender == "Male") {
ref.copies <- 1
} else {
ref.copies <- 2
}
}
vac.ccf <- function(CCF, purity, absCN) {
purity * CCF * copies / (ref.copies * (1 - purity) + purity * absCN)
}
probs <- sapply(CCFs, function(c) {
dbinom(alt_allele, coverage, vac.ccf(c, purity, absCN))
})
probs <- probs / sum(probs)
ccf.max <- which.max(probs)
max.prob <- max(probs)
ccf.gt.half.max <- which(probs > max(probs) / 2)
ccf.lower <-
max(ccf.gt.half.max[1] - 1, 1) ### closest ccf value before half-max range (within 0-1 range)
ccf.upper <-
min(ccf.gt.half.max[length(ccf.gt.half.max)] + 1, length(CCFs)) ### closest ccf value after half-max range (within 0-1 range)
if (is.na(purity)) {
ccf.upper <- NA
}
ccf.max <- ccf.max / length(CCFs)
ccf.lower <- ccf.lower / length(CCFs)
ccf.upper <- ccf.upper / length(CCFs)
#if(is.na(purity)){ccf.upper<-NA}
p100 <- sum(probs[101])
p80 <- sum(probs[81:101])
p50 <- sum(probs[51:101])
as.list(c(probs, ccf.max, max.prob, p100, p80, p50))
}
determineMaxLikelihood <- function(major_CCF,
minor_CCF,
multiplicity_CCF,
one_copy_CCF,
major_max_prob,
minor_max_prob,
multiplicity_max_prob,
one_copy_max_prob) {
copy.number.options <- c("major", "minor", "multiplicity", "one_copy")
max.probs <- c(major_max_prob, minor_max_prob, multiplicity_max_prob, one_copy_max_prob)
max.index <- which(max.probs == max(max.probs, na.rm = T))[1]
ccfs <- c(major_CCF, minor_CCF, multiplicity_CCF, one_copy_CCF)
mle.copy.number <- copy.number.options[max.index]
as.list(c(ccfs[max.index], paste0(mle.copy.number, "_CN")))
}
ccfAnnotate <- function(maf) {
maf <- maf[order(maf$Tumor_Sample_Barcode), ]
maf <- purity.check(maf)
maf <- tcn.check(maf)
maf <- format.maf(maf)
maf <- multiplicity.cn(maf)
# maf <- as.data.frame(maf)
maf <- maf[which(!is.na(maf$absCN)), ]
maf <- maf[which(!is.na(maf$major_CN)), ]
print("Determining likelihoods for major CN")
maf[, c(paste0("major_ccf_", seq(0, 1, 0.01)), "major_CCF", "major_max_prob", "major_100", "major_80", "major_50") :=
ccf.likelihood(purity,
absCN,
t_alt_count,
(t_alt_count + t_ref_count),
copies = major_CN,
chromosome = Chromosome,
gender), by = 1:nrow(maf)]
print("Determining likelihoods for minor CN")
maf[, c(paste0("minor_ccf_", seq(0, 1, 0.01)), "minor_CCF", "minor_max_prob", "minor_100", "minor_80", "minor_50") :=
ccf.likelihood(purity,
absCN,
t_alt_count,
(t_alt_count + t_ref_count),
copies = minor_CN,
chromosome = Chromosome,
gender), by = 1:nrow(maf)]
print("Determining likelihoods for expected multiplicity CN")
maf[, c(paste0("multiplicity_ccf_", seq(0, 1, 0.01)), "multiplicity_CCF", "multiplicity_max_prob", "multiplicity_100", "multiplicity_80", "multiplicity_50") :=
ccf.likelihood(purity,
absCN,
t_alt_count,
(t_alt_count + t_ref_count),
copies = multiplicity_CN,
chromosome = Chromosome,
gender), by = 1:nrow(maf)]
print("Determining likelihoods for mutation on only one copy (after CNA event)")
maf[, c(paste0("one_copy_ccf_", seq(0, 1, 0.01)), "one_copy_CCF", "one_copy_max_prob", "one_copy_100", "one_copy_80", "one_copy_50") :=
ccf.likelihood(purity,
absCN,
t_alt_count,
(t_alt_count + t_ref_count),
copies = one_copy_CN,
chromosome = Chromosome,
gender), by = 1:nrow(maf)]
print("Determining MLE of CCF across CNs")
maf[ ,c("CCF", "mle_CN") := determineMaxLikelihood(major_CCF,
minor_CCF,
multiplicity_CCF,
one_copy_CCF,
major_max_prob,
minor_max_prob,
multiplicity_max_prob,
one_copy_max_prob), by = 1:nrow(maf)]
return(maf)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment