Skip to content

Instantly share code, notes, and snippets.

View lwaldron's full-sized avatar

Levi Waldron lwaldron

View GitHub Profile
# I create and discuss this code at https://youtu.be/nU_GEPKVXU8
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
system("gsutil cp gs://biocbbs_2020a/cmd3out/uploads/GuptaA_2019.metaphlan_bugs_list.stool.rda .")
load("GuptaA_2019.metaphlan_bugs_list.stool.rda")
head(rownames(GuptaA_2019.metaphlan_bugs_list.stool)) #first 3 look wrong
grep("CIBIO", rownames(GuptaA_2019.metaphlan_bugs_list.stool)) #there are 60 with CIBIO in the rowname
@lwaldron
lwaldron / cBioPortal tests
Last active September 1, 2020 22:16
Download of full ACC and BRCA datasets, GBM IMPACT341
# to run this using Docker from the command line on the stock Bioconductor image:
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
#acc_tcga full data pack
system.time(accpack <- cBioDataPack("acc_tcga")) #~10 seconds
accpack
@lwaldron
lwaldron / acc_tcga IMPACT341
Created September 1, 2020 19:41
Quick test for download/construction time of acc_tcga IMPACT341 panel
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
cBio <- cBioPortal()
system.time(acc <- cBioPortalData(cBio, studyId = "acc_tcga", genePanelId = "IMPACT341"))
acc
@lwaldron
lwaldron / tcga_gbm_IMPACT341
Last active September 1, 2020 20:58
Quick test for download/construction time of gbm_tcga IMPACT341 panel
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
cBio <- cBioPortal()
system.time(gbm <- cBioPortalData(cBio, studyId = "gbm_tcga", genePanelId = "IMPACT341"))
gbm
# again
system.time(gbm <- cBioPortalData(cBio, studyId = "gbm_tcga", genePanelId = "IMPACT341"))
source("https://raw.githubusercontent.com/waldronlab/bugSigSimple/master/R/simple.R")
x=readCurationSheet("https://github.com/waldronlab/bugSigSimple/blob/master/inst/extdata/Microbial%20signatures%20curation%20-%20signatures.tsv?raw=true")
length(unique(x$PMID))
writeLines(unique(x$PMID), file("signaturesPMID.txt"))
@lwaldron
lwaldron / checkBiocInstallation
Last active January 24, 2019 11:29
Check which Bioconductor packages you can and can't install, with log
library(BiocManager)
dir.create("~/packagefiles")
unlink("installationresults.txt")
pkgs <- available.packages(contrib.url(BiocManager::repositories()["BioCsoft"]))
pkgs <- rownames(pkgs)
set.seed(1)
pkgs <- sample(pkgs)
## pkgs <- pkgs[!pkgs %in% installed.packages()]
@lwaldron
lwaldron / methbenchmark.R
Last active January 14, 2019 10:39
simple DelayedMatrix benchmark showing access time of n rows growing as O(n^3)
if( Biobase::package.version("curatedTCGAData") < "1.5.6" ){
BiocManager::install("waldronlab/curatedTCGAData")
}
stopifnot(BiocManager::version() >= "3.9")
library(curatedTCGAData) #requires >=1.5.6 and bioc-devel
mae <- curatedTCGAData("UCEC", "Methylation_methyl27", dry.run = FALSE) #~2 seconds from cache
dm <- assay(mae, 1)
# benchmarking showing cubic increase with # rows
@lwaldron
lwaldron / GMQLusecase
Created December 19, 2018 20:14
GMQL use case (from Masseroli et al 2018, Bioinformatics bty688)
# Masseroli et al 2018, https://doi.org/10.1093/bioinformatics/bty688
# "In TCGA data of BRCA patients, find the DNA somatic mutations
# within the first 2000 bp outside of the genes that are both
# expressed with FPKM > 3 and have at least a methylation in the same patient
# biospecimen, and extract these mutations of the top 5% patients
# with the highest number of such mutations."
library(curatedTCGAData)
system.time(mae <- curatedTCGAData("ACC", c("Mutation", "RNASeq2GeneNorm", "Methylation"), dry.run = FALSE))
@lwaldron
lwaldron / REexample.R
Created July 11, 2018 11:37
Example of RaggedExperiment::qreduceAssay
## ------------------------------------------------------------------------
library(GenomicRanges)
library(RaggedExperiment)
sample1 <- GRanges(
c(A = "chr1:1-10:-", B = "chr1:8-14:+", C = "chr2:15-18:+"),
score = 3:5)
sample2 <- GRanges(
c(D = "chr1:1-10:-", E = "chr2:11-18:+"),
score = 1:2)
colDat <- DataFrame(id = 1:2)