Skip to content

Instantly share code, notes, and snippets.

@mikelove
Created January 8, 2015 15:29
Show Gist options
  • Save mikelove/cdc92bec094131627698 to your computer and use it in GitHub Desktop.
Save mikelove/cdc92bec094131627698 to your computer and use it in GitHub Desktop.
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
t <- transcriptsBy(txdb, by="gene")
e <- exonsBy(txdb, by="tx")
t2 <- unlist(t)
gene2t <- data.frame(gene=names(t2), tx=t2$tx_id)
g <- unique(names(t2))
set.seed(1)
random.genes <- sample(g, 2000, replace=FALSE)
set.seed(1)
length.res <- sapply(random.genes, function(gene) {
txs <- gene2t$tx[gene2t$gene == gene]
if (length(txs) == 1) return(1)
lengths <- sum(width(e[txs]))
two <- sample(lengths, 2, replace=FALSE)
two[1]/two[2]
})
set.seed(1)
shared.res <- sapply(random.genes, function(gene) {
txs <- gene2t$tx[gene2t$gene == gene]
if (length(txs) == 1) return(1)
two <- sample(e[txs], 2, replace=FALSE)
longer <- max(sum(width(two)))
intersection <- sum(width(intersect(two[[1]], two[[2]])))
intersection/longer
})
round(quantile(length.res,c(.05,.1,.25,.5,.75,.9,.95)),3)
round(median(shared.res),3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment