Skip to content

Instantly share code, notes, and snippets.

View mikelove's full-sized avatar

Michael Love mikelove

View GitHub Profile
@mikelove
mikelove / interactions.R
Created September 14, 2015 01:01
interactions ggplot
npg <- 20
mu <- c(200,400,400,800,800,1600)
cond <- rep(rep(c("A","B"),each=npg),3)
geno <- rep(c("X","Y","Z"),each=2*npg)
table(cond, geno)
counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
d <- data.frame(counts, cond, geno)
library(ggplot2)
plotit <- function(d) {
ggplot(d, aes(x=cond, y=counts, group=geno)) +
@mikelove
mikelove / my_test.R
Last active November 2, 2016 17:45
testthat with filename information
my_test <- function() {
tests <- list.files("tests/testthat","test_.*")
for (i in seq_along(tests)) {
message(paste0(i," / ",length(tests),": ",tests[i]))
test(filter=sub("test_(.*).R","\\1",tests[i]))
}
}
# run linear model for each unique level of 'cyl' and return R^2
library(purrr)
mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .)) %>%
map(summary) %>%
map_dbl("r.squared")
# in base R this might look like
mtcars$cyl <- factor(mtcars$cyl)
library(purrr)
library(dplyr)
# some functions
# just a convenience function, gives back random assignments
# conceptually like: sample(labels, size=n, replace=TRUE, prob=prob)
random_group <- function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n), c(0, cumsum(probs)),
# the task here is just to compare the mean and median as estimators
# for the location of the distribution. I want to try 100 reps for different
# sample size, and also for different types of distribution (normal vs t)
nrep <- 100
d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
d <- d[rep(seq_len(nrow(d)),each=nrep),]
res <- lapply(seq_len(nrow(d)), function(i) {
if (d$type[i] == "normal") {
nrep <- 100
d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
d <- d[rep(seq_len(nrow(d)),each=nrep),]
simulate <- function(n, type) {
if (type == "normal") {
dat <- rnorm(n)
} else {
dat <- rt(n, df=3)
}
d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
d <- d[rep(seq_len(nrow(d)),each=nrep),]
simulate <- function(n, type) {
if (type == "normal") {
dat <- rnorm(n)
} else {
dat <- rt(n, df=3)
}
dat
@mikelove
mikelove / genelength.R
Last active December 3, 2015 15:10
gene length factor
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
g <- keys(txdb, "GENEID")
df <- select(txdb, keys=g, keytype="GENEID", columns="TXID")
ebt <- exonsBy(txdb, by="tx")
set.seed(1)
random.genes <- sample(g, 500, replace=FALSE)
res <- sapply(random.genes, function(gene) {
txs <- df$TXID[df$GENEID == gene]
if (length(txs) == 1) return(NA)
@mikelove
mikelove / gist:1e80ffb5b14b02bc288f
Created November 7, 2015 17:40
ubuntu remember brightness in /etc/rc.local
echo 22 > /sys/class/backlight/acpi_video0/brightness
@mikelove
mikelove / dplyr_vs_by.R
Last active November 25, 2015 15:44
dplyr's summarize_each much faster than by()
n <- 50
m <- matrix(1:(50000*n),ncol=n)
f <- factor(rep(1:25000,each=2))
system.time({ z <- do.call(rbind, by(m, f, colSums)) })
# 16.3 seconds
library(dplyr)
d <- as.data.frame(cbind(f,m))
system.time({ d %>% group_by(f) %>% summarize_each(funs(sum)) })
# 0.137 seconds