Skip to content

Instantly share code, notes, and snippets.

View katwre's full-sized avatar

Katarzyna Wreczycka katwre

View GitHub Profile
@katwre
katwre / first_script.py
Last active August 29, 2015 14:18
First Gist
print "Hello world"
@katwre
katwre / comp_foverlaps_findoverlaps.R
Last active August 29, 2015 14:18
Comparison the execution time of foverlaps and findOverlaps functions
library(rbenchmark)
library(GenomicRanges)
library(data.table)
# create sample dataset
start <- seq(1L, 20e8L, 1000L)
end <- start + 3000L
chrom <- "chr1"
DT.big <- data.table(chrom, start, end)
plot(t1, type="l", col="red",
xlab="", ylab="time [sec]",
xaxt = "n", ylim=c(0, max(t1, t2)))
lines(t2, type="l", col="blue")
title(xlab = "number of rows of bigger dataset", line = 5)
par(las=2)
options(scipen=999) #disable scientific notation
axis(1, at=seq(10,200, 10),
labels=nmb.rows[seq(10, length(nmb.rows), 10)])
legend("bottomright", c("findOverlaps","foverlaps"),
@katwre
katwre / getData.R
Last active August 29, 2015 14:19
Using genomation to analyze methylation profiles from Roadmap epigenomics and ENCODE
# RRBS from ENCODE
rrbs.IMR90.path <- "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeHaibMethylRrbs/wgEncodeHaibMethylRrbsImr90UwSitesRep1.bed.gz"
rrbs.H1.path <- "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeHaibMethylRrbs/wgEncodeHaibMethylRrbsH1hescHaibSitesRep1.bed.gz"
# read the RRBS data to GRanges
library(genomation)
rrbs.IMR90<-readGeneric(rrbs.IMR90.path, chr = 1, start = 2, end = 3, strand = 6,
meta.cols = list(score=11), keep.all.metadata = FALSE, zero.based = T, skip = 1)
rrbs.H1<-readGeneric(rrbs.H1.path, chr = 1, start = 2, end = 3, strand = 6,
@katwre
katwre / show_ranges.R
Last active August 29, 2015 14:20
a way to show two granges objects on a plot. from http://www.sthda.com/english/wiki/iranges
target.paired.end = GRanges(rep(c(1),each=7),
IRanges(c(7,7,8,9,9,24,25), width=39),
strand=rep(c("-"), times=7))
windows.paired.end = GRanges(rep(c(1),each=3), IRanges(c(7,8,24), width=10),
strand=rep("-", times=3))
plotir <- function(ir,i, col) { arrows(start(ir)-.5,i,end(ir)+.5,i,code=3,angle=90,lwd=3, col=col) }
plot(0,0,xlim=c(0,65),ylim=c(0,11),type="n",xlab="",ylab="",xaxt="n")
axis(1,0:65)
abline(v=0:65 + .5,col=rgb(0,0,0,.5))
destdir = "~/" #directory where fastq files will be stored
nmbr_of_cores = 20
SRAdb_path = '~/SRAmetadb.sqlite'
exp_ids=c("ERX620541")
require(SRAdb)
sqlfile <- SRAdb_path
if(!file.exists(SRAdb_path)) sqlfile <- getSRAdbFile()
sra_con <- dbConnect(SQLite(),sqlfile) #conection to SRA database
@katwre
katwre / count_total_number_of_reads.R
Last active August 27, 2015 12:22
count total number of reads
bampath = "example.bam"
command=paste0("samtools idxstats ", bampath," | awk 'BEGIN {a=0} {a += $3 } END{print a }'")
tot.mapped=as.numeric(try(system(command,intern=TRUE)))
@katwre
katwre / median.R
Created October 18, 2015 17:44
runmed faster than roll_median
library(RcppRoll)
library(microbenchmark)
x <- runif(1000,0.5,1.5)
microbenchmark(
runmed(x = x, k = 3),
roll_median(x, 3),
times=1000L
)
@katwre
katwre / ucsctable2gr.R
Created March 3, 2016 22:39
UCSC table to GRanges object (only chrom, start and end)
uniform_tfbs = data.frame(cell.line=c("GM12878" ,"H1-hESC" ,"K562", "HeLa-S3", "HepG2", "HUVEC", "A549", "IMR-90", "MCF-7"),
schema=c("wgEncodeAwgTfbsUtaGm12878CtcfUniPk", "wgEncodeAwgTfbsUtaH1hescCtcfUniPk", "wgEncodeAwgTfbsUtaK562CtcfUniPk", "wgEncodeAwgTfbsUtaHelas3CtcfUniPk",
"wgEncodeAwgTfbsUtaHepg2CtcfUniPk", "wgEncodeAwgTfbsUtaHuvecCtcfUniPk", "wgEncodeAwgTfbsHaibA549Ctcfsc5916Pcr1xDex100nmUniPk",
"wgEncodeAwgTfbsSydhImr90CtcfbIggrabUniPk", "wgEncodeAwgTfbsUtaMcf7CtcfSerumstimUniPk"),
stringsAsFactors = FALSE)
#' UCSC table to GRanges object (only chrom, start and end)
#'
#' @param table UCSC table
# http://www.vincebuffalo.com/blog/2012/03/12/using-bioconductor-to-analyze-your-23andme-data.html
library(gwascat)
data(ebicat37)
library(GenomicRanges)
data="~/projects/23andme/genome_v4.txt"