Skip to content

Instantly share code, notes, and snippets.

View mikelove's full-sized avatar

Michael Love mikelove

View GitHub Profile
@mikelove
mikelove / null42.R
Created March 30, 2016 21:26
null comparisons 42 WT
files <- list.files("files","gbgout")
exclude <- paste0(c("WT_rep21_MID62_allLanes_tophat2.0.5.bam",
"WT_rep22_MID67_allLanes_tophat2.0.5.bam",
"WT_rep25_MID61_allLanes_tophat2.0.5.bam",
"WT_rep28_MID64_allLanes_tophat2.0.5.bam",
"WT_rep34_MID76_allLanes_tophat2.0.5.bam",
"WT_rep36_MID63_allLanes_tophat2.0.5.bam"), ".gbgout")
files <- files[!files %in% exclude]
length(files)
@mikelove
mikelove / branching.R
Last active April 15, 2016 19:16
branching
pcr <- function(y,p,rounds) {
for (i in 1:rounds) {
y <- y + rbinom(1, y, p)
}
y
}
prob <- function(s,n,p) {
if (s == 1) {
if (n == 1) return(1)
@mikelove
mikelove / bioc_gc.R
Last active April 16, 2016 15:40
getting GC content
library(Homo.sapiens)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(BSgenome.Hsapiens.UCSC.hg19)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
e <- exonsBy(txdb, "gene")
mapIds(Homo.sapiens, "AFTPH", "ENTREZID", "SYMBOL")
ee <- e[["54812"]]
dna <- getSeq(Hsapiens, ee)
mcols(ee)$gc <- letterFrequency(dna, "GC", as.prob=TRUE)
---
title: "Package-specific methods"
author: "Mike Love"
date: "April 18, 2016"
output: html_document
---
**Intro**: The Bioconductor project contains analysis
packages which often depend on a number of core packages,
which contain core classes (S4 classes like *eSet*, *SummarizedExperiment*, etc.)
awk '(NR - 1)/4 % 2 < 1' test
@mikelove
mikelove / build_R.md
Last active February 20, 2017 16:43
build R on cluster
@mikelove
mikelove / lambdahatij_test.R
Last active May 1, 2016 19:48
lambdahatij branch test cod
bamfiles <- "ERR188436.Aligned.sortedByCoord.out.bam"
names(bamfiles) <- "ERR188436"
load("fitpar_gc_str.rda")
load_all("~/proj/alpine/")
library(GenomicAlignments)
library(GenomicFeatures)
library(BSgenome.Hsapiens.UCSC.hg19)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
library(tximport)
txi <- tximport(c("LA1.sf","LB1.sf"), type="salmon", txOut=TRUE)
library(Homo.sapiens)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txdf <- select(txdb, keys(txdb, "GENEID"), "TXID", "GENEID")
txdf$REFSEQ <- mapIds(Homo.sapiens, as.character(txdf$TXID), "REFSEQ", "TXID")
tab <- table(txdf$GENEID)
txdf$numiso <- tab[txdf$GENEID]
txdf2 <- txdf[txdf$REFSEQ %in% rownames(txi$abundance),]
a <- sapply(1:10, function(i) sum(txi$abundance[txdf2$REFSEQ[txdf2$numiso == i],1]))
@mikelove
mikelove / tsne.R
Last active April 6, 2024 01:11
Exploring behavior of t-SNE on linear data
n <- 200
m <- 40
set.seed(1)
x <- runif(n, -1, 1)
library(rafalib)
bigpar(2,2,mar=c(3,3,3,1))
library(RColorBrewer)
cols <- brewer.pal(11, "Spectral")[as.integer(cut(x, 11))]
plot(x, rep(0,n), ylim=c(-1,1), yaxt="n", xlab="", ylab="",
col=cols, pch=20, main="underlying data")
@mikelove
mikelove / human_mouse_gc.R
Created June 8, 2016 19:36
human and mouse GC distn
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
hs <- TxDb.Hsapiens.UCSC.hg19.knownGene
mm <- TxDb.Mmusculus.UCSC.mm10.knownGene
library(BSgenome.Hsapiens.UCSC.hg19)
library(BSgenome.Mmusculus.UCSC.mm10)
hstx <- extractTranscriptSeqs(Hsapiens, exonsBy(hs, by="tx"))
mmtx <- extractTranscriptSeqs(Mmusculus, exonsBy(mm, by="tx"))
hstx2 <- hstx[width(hstx) > 100]
mmtx2 <- mmtx[width(mmtx) > 100]