Skip to content

Instantly share code, notes, and snippets.

@jnpaulson
Last active November 5, 2019 08:01
Show Gist options
  • Save jnpaulson/8c2ccfb0185dc490ff72e51aef86678c to your computer and use it in GitHub Desktop.
Save jnpaulson/8c2ccfb0185dc490ff72e51aef86678c to your computer and use it in GitHub Desktop.
library(yarn)
library(dplyr)
library(rafalib)
obj = downloadGTEx()
lowSampleSizes = c("Bladder","Cells - Leukemia cell line (CML)","Cervix - Ectocervix","Cervix - Endocervix","Fallopian Tube")
obj = filterSamples(obj,lowSampleSizes,"SMTSD") %>%
filterMissingGenes
our_subtypes = gsub(" - "," ",as.character(pData(obj)$SMTSD))
our_subtypes = gsub(" ","_",our_subtypes)
our_subtypes = tolower(our_subtypes)
brain_0 = which(pData(obj)$SMTSD == "Brain - Hypothalamus")
brain_01 = which(pData(obj)$SMTSD == "Brain - Amygdala")
brain_02 = which(pData(obj)$SMTSD == "Brain - Anterior cingulate cortex (BA24)")
brain_03 = which(pData(obj)$SMTSD == "Brain - Hippocampus")
brain_04 = which(pData(obj)$SMTSD == "Brain - Cortex")
brain_05 = which(pData(obj)$SMTSD == "Brain - Substantia nigra")
brain_06 = which(pData(obj)$SMTSD == "Brain - Spinal cord (cervical c-1)")
brain_07 = which(pData(obj)$SMTSD == "Brain - Frontal Cortex (BA9)")
brain_0 = union(brain_0,c(brain_01,brain_02,brain_03,brain_04,brain_05,brain_06,brain_07))
our_subtypes[brain_0] = "Brain-0"
brain_1 = c(which(pData(obj)$SMTSD=="Brain - Cerebellum"),which(pData(obj)$SMTSD=="Brain - Cerebellar Hemisphere"))
our_subtypes[brain_1] = "Brain-1"
brain_2 = c(which(pData(obj)$SMTSD=="Brain - Caudate (basal ganglia)"),which(pData(obj)$SMTSD=="Brain - Nucleus accumbens (basal ganglia)"),which(pData(obj)$SMTSD=="Brain - Putamen (basal ganglia)"))
our_subtypes[brain_2] = "Brain-2"
skin = grep("sun",our_subtypes)
our_subtypes[skin] = "skin"
our_subtypes = factor(tolower(our_subtypes))
pData(obj)$our_subtypes = our_subtypes
message("smallest sample size is:")
show(min(table(pData(obj)$our_subtypes)))
obj = filterLowGenes(obj,"our_subtypes")
### Normalize using qsmooth
obj = normalizeTissueAware(obj,"our_subtypes")
saveRDS(obj,file=("gtex_portal_normalized.rds")
#' Compute voom weights using customized log2 counts
#' Modified from: https://raw.githubusercontent.com/jhsiao999/Humanzee/916ee1b3e0e213839c17a99280211d68edaca161/R/voomWeightsCustomized.r
#' and of course the voom function
#'
#' @param log2counts counts on log2 scale
#' @param design Experimental design of the data. Required to be an R
#' design.matrix object
#' @param lib.size Library size.
#' @param is.cpm if the data is CPM normalized.
#'
#' @export
#'
# example:
# gender = pData(objsubset)$gender
# batch = factor(as.character(pData(objsubset)$SMNABTCHT))
# design = model.matrix(~batch+gender)
# transformedCounts = assayData(objsubset)[["qsmooth"]]
# voomOutput = voomWeightsCustomized(transformedCounts,design)
# fitGood = lmFit(voomOutput,design)
# fitGood = eBayes(fitGood)
# gl = fData(objsubset)$geneNames
# tt = topTable(fit,number=Inf,genelist=gl,coef="genderMALE")
voomWeightsCustomized <- function(log2counts,design,lib.size = NULL,is.cpm = FALSE) {
if (is.null(design)) {
design <- model.matrix(~ 1 )
}
out<-list()
if (is.cpm == TRUE) {
fit <- lmFit(log2counts, design)
xx <- fit$Amean + mean(log2(lib.size + 1)) - log2(1e+06)
yy <- sqrt(fit$sigma)
l <- lowess(xx, yy, f = .5)
f <- approxfun(l, rule = 2)
fitted.values <- fit$coef %*% t(fit$design)
fitted.cpm <- 2^fitted.values
fitted.count <- 1e-06 * t(t(fitted.cpm) * (lib.size + 1))
fitted.logcount <- log2(fitted.count)
w <- 1/f(fitted.logcount)^4
dim(w) <- dim(fitted.logcount)
}
if (is.cpm == FALSE) {
fit <- lmFit(log2counts, design)
xx <- fit$Amean
yy <- sqrt(fit$sigma)
l <- lowess(xx, yy, f = .5)
f <- approxfun(l, rule = 2)
fitted.values <- fit$coef %*% t(fit$design)
fitted.logcount <- log2(fitted.values)
w <- 1/f(fitted.logcount)^4
dim(w) <- dim(fitted.logcount)
}
out$E = log2counts
out$weights = w
out$design = design
new("EList",out)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment