Skip to content

Instantly share code, notes, and snippets.

@alyssafrazee
Created August 8, 2014 19:49
Show Gist options
  • Save alyssafrazee/e3d4627b77c6f4c8820f to your computer and use it in GitHub Desktop.
Save alyssafrazee/e3d4627b77c6f4c8820f to your computer and use it in GitHub Desktop.
code to create publicly-available Ballgown objects from GEUVADIS data
## create ballgown objects with GEUVADIS data
source("http://bioconductor.org/biocLite.R")
biocLite('ballgown')
library(ballgown)
system('mkdir -p Ballgown/small_objects')
## make phenotype table:
dataDir = 'Ballgown/' #tablemaker output lives here
sampnames = list.files(dataDir, pattern = 'H|N')
info = read.table('../pop_data_annot_whole.txt')
#^ file available at https://github.com/alyssafrazee/ballgown_code/blob/master/GEUVADIS_preprocessing/pop_data_annot_whole.txt
pheno = read.table("../GD667.QCstats.masterfile.txt", sep='\t', header=TRUE)
#^ file available at https://github.com/alyssafrazee/ballgown_code/blob/master/GEUVADIS_preprocessing/GD667.QCstats.masterfile.txt
m = read.delim("pop_data_withuniqueid.txt", as.is=TRUE)
#^ file available at https://github.com/alyssafrazee/ballgown_code/blob/master/GEUVADIS_preprocessing/pop_data_withuniqueid.txt
pd = data.frame(dirname=info$V2, population=info$V3)
pd = pd[match(sampnames, pd$dirname),]
pd$dirname = as.character(pd$dirname)
pd$IndividualID = ballgown:::ss(pd$dirname, "_", 1)
pd$SampleID = m$sample_id[match(pd$dirname, m$folder_id)]
pd$UseThisDup = pheno$UseThisDuplicate[match(pd$SampleID, rownames(pheno))]
pd$RIN = pheno$RIN[match(pd$SampleID, rownames(pheno))]
## make ballgown objects with various expression measurements
## this takes several hours (compressing this much is pretty slow)
fpkm = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='FPKM')
save(fpkm, file='small_objects/fpkm.rda', compress='xz')
cov = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='cov')
save(cov, file='small_objects/cov.rda', compress='xz')
rcount = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='rcount')
save(rcount, file='small_objects/rcount.rda', compress='xz')
ucount = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='ucount')
save(ucount, file='small_objects/ucount.rda', compress='xz')
mrcount = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='mrcount')
save(mrcount, file='small_objects/mrcount.rda', compress='xz')
cov_sd = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='cov_sd')
save(cov, file='small_objects/cov_sd.rda', compress='xz')
mcov = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='mcov')
save(mcov, file='small_objects/mcov.rda', compress='xz')
mcov_sd = ballgown(dataDir=dataDir, samplePattern='H|N', pData=pd, meas='mcov_sd')
save(mcov_sd, file='small_objects/mcov_sd.rda', compress='xz')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment