Michael Love mikelove

Genetics & Biostatistics at UNC-Chapel Hill. R/Bioconductor developer

mikelove / interactions.R

Created September 14, 2015 01:01

interactions ggplot

	npg <- 20
	mu <- c(200,400,400,800,800,1600)
	cond <- rep(rep(c("A","B"),each=npg),3)
	geno <- rep(c("X","Y","Z"),each=2*npg)
	table(cond, geno)
	counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
	d <- data.frame(counts, cond, geno)
	library(ggplot2)
	plotit <- function(d) {
	ggplot(d, aes(x=cond, y=counts, group=geno)) +

mikelove / my_test.R

Last active November 2, 2016 17:45

testthat with filename information

	my_test <- function() {
	tests <- list.files("tests/testthat","test_.*")
	for (i in seq_along(tests)) {
	message(paste0(i," / ",length(tests),": ",tests[i]))
	test(filter=sub("test_(.*).R","\\1",tests[i]))
	}
	}

mikelove / purrr_example.R

Last active September 29, 2015 01:08

	# run linear model for each unique level of 'cyl' and return R^2
	library(purrr)
	mtcars %>%
	split(.$cyl) %>%
	map(~ lm(mpg ~ wt, data = .)) %>%
	map(summary) %>%
	map_dbl("r.squared")

	# in base R this might look like
	mtcars$cyl <- factor(mtcars$cyl)

mikelove / purrr_example2.R

Created September 29, 2015 01:54

	library(purrr)
	library(dplyr)

	# some functions

	# just a convenience function, gives back random assignments
	# conceptually like: sample(labels, size=n, replace=TRUE, prob=prob)
	random_group <- function(n, probs) {
	probs <- probs / sum(probs)
	g <- findInterval(seq(0, 1, length = n), c(0, cumsum(probs)),

mikelove / naive_example3.R

Last active October 9, 2015 14:54

	# the task here is just to compare the mean and median as estimators
	# for the location of the distribution. I want to try 100 reps for different
	# sample size, and also for different types of distribution (normal vs t)

	nrep <- 100
	d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
	d <- d[rep(seq_len(nrow(d)),each=nrep),]

	res <- lapply(seq_len(nrow(d)), function(i) {
	if (d$type[i] == "normal") {

mikelove / purrr_example3.R

Last active October 9, 2015 15:09

	nrep <- 100
	d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
	d <- d[rep(seq_len(nrow(d)),each=nrep),]

	simulate <- function(n, type) {
	if (type == "normal") {
	dat <- rnorm(n)
	} else {
	dat <- rt(n, df=3)
	}

mikelove / plyr_example3.R

Last active October 9, 2015 14:53

	d <- expand.grid(n=c(3,5,10,20), type=c("normal","t"))
	d <- d[rep(seq_len(nrow(d)),each=nrep),]

	simulate <- function(n, type) {
	if (type == "normal") {
	dat <- rnorm(n)
	} else {
	dat <- rt(n, df=3)
	}
	dat

mikelove / genelength.R

Last active December 3, 2015 15:10

gene length factor

	library(TxDb.Hsapiens.UCSC.hg19.knownGene)
	txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
	g <- keys(txdb, "GENEID")
	df <- select(txdb, keys=g, keytype="GENEID", columns="TXID")
	ebt <- exonsBy(txdb, by="tx")
	set.seed(1)
	random.genes <- sample(g, 500, replace=FALSE)
	res <- sapply(random.genes, function(gene) {
	txs <- df$TXID[df$GENEID == gene]
	if (length(txs) == 1) return(NA)

mikelove / gist:1e80ffb5b14b02bc288f

Created November 7, 2015 17:40

ubuntu remember brightness in /etc/rc.local

echo 22 > /sys/class/backlight/acpi_video0/brightness

mikelove / dplyr_vs_by.R

Last active November 25, 2015 15:44

dplyr's summarize_each much faster than by()

	n <- 50
	m <- matrix(1:(50000*n),ncol=n)
	f <- factor(rep(1:25000,each=2))
	system.time({ z <- do.call(rbind, by(m, f, colSums)) })
	# 16.3 seconds

	library(dplyr)
	d <- as.data.frame(cbind(f,m))
	system.time({ d %>% group_by(f) %>% summarize_each(funs(sum)) })
	# 0.137 seconds