bobthecat’s gists

bobthecat / Dice_Rcpp.Rmd

Created September 10, 2013 13:22

Rcpp implementation of the Dice coefficient

	---
	title: Dice coefficient with RcppEigen
	author: David Ruau
	license: GPL (>= 2)
	tags: Rcpp RcppEigen
	summary: Compute the Dice coefficient (1945) between column of a matrix.
	---

	The Dice coefficient is a simple measure of similarity / dissimilarity (depending how you take it). It is intended to compare asymmetric binary vectors, meaning one of the combination (usually 0-0) is not important and agreement (1-1 pairs) have more weight than disagreement (1-0 or 0-1 pairs). Imagine the following contingency table:

bobthecat / connect.r

Created March 19, 2013 10:28

	library(RMySQL)
	con <- dbConnect(MySQL(), group='toto', dbname="user_profile")
	m <- dbGetQuery(con, "SELECT DISTINCT user_id FROM demographics WHERE gender='0'")
	dbDisconnect(con)

bobthecat / benchmark.r

Last active December 14, 2015 03:49

	R <- c(2000, 5000, 10000, 20000, 40000)
	## I hit the limit at ~50000 the ff function refuse to create the matrix.
	# Error in if (length < 0 \|\| length > .Machine$integer.max) stop("length must be between 1 and .Machine$integer.max") :
	# missing value where TRUE/FALSE needed
	# http://www.bytemining.com/2010/05/hitting-the-big-data-ceiling-in-r/
	normal <- numeric(length=length(R))
	for(i in 1:length(R)){
	split <- ifelse(R[i]<=20000, 10, 20)
	MAT <- matrix(rnorm(R[i] * 10), nrow = 10)
	normal[i] <- system.time(res <- bigcor(MAT, nblocks = split, verbose=FALSE))[3]

bobthecat / bigcorPar.r

Last active April 5, 2023 02:32

	bigcorPar <- function(x, nblocks = 10, verbose = TRUE, ncore="all", ...){
	library(ff, quietly = TRUE)
	require(doMC)
	if(ncore=="all"){
	ncore = multicore:::detectCores()
	registerDoMC(cores = ncore)
	} else{
	registerDoMC(cores = ncore)
	}

bobthecat / empirical_FDR.r

Last active December 9, 2015 23:58

	## LOADING LIBRARIES FOR PARALLEL PROCESSING
	library(doMC)
	ncore = multicore:::detectCores()
	registerDoMC(cores = ncore)

	## COMPUTING THE RANDOM P-VALUE DISTRIBUTION
	# How many random sampling
	R=100
	# Shuffling the sample labels and recomputing the p-value each time
	p.rand <- foreach(i = 1:dim(e)[1], .combine=rbind) %dopar% {

bobthecat / empirical_FDR.r

Last active December 9, 2015 23:58

	## EXTRACTING CLASS LABELS
	classLabel <- sub("^ER(.*) breast cancer", "\\1", grep("ER", phenoData$specimen, value=T))
	classLabel
	[1] "-" "-" "-" "-" "-" "-" "-" "-" "-" "+" "+" "+" "+" "+" "+" "+" "+" "+"

	## COMPUTING P-VALUE DISTRIBUTION
	minus = which(classLabel=="-")
	plus = which(classLabel=="+")
	p <- apply(e, 1, function(x){t.test(as.numeric(x[minus]), as.numeric(x[plus]))$p.value})

bobthecat / getting_some_data.r

Created December 20, 2012 18:19

	library(GEOquery)
	## Download the data from GEO
	GDS3716 <- getGEO('GDS3716')
	# transform the GDS to and expressionSet
	eset <- GDS2eSet(GDS3716,do.log2=TRUE)
	phenoData <- pData(eset)
	# keep only the ER+ and ER-
	samples <- phenoData$sample[grep("ER", phenoData$specimen)]
	# subsetting the expressionSet
	eset <- eset[,samples]

bobthecat / religion_index.r

Created September 21, 2012 18:37

	library(xlsx)
	library(googleVis)
	# I downloaded the Excel file, cleaned the headers and worked a bit
	# the column title.
	da <- read.xlsx("~/Downloads/religion.xlsx", sheetName=1)
	rownames(da) <- da$COUNTRY.
	da <- da[,-1]
	religion <- data.frame(country=rep(rownames(da), 3),
	year=c(rep(2007, dim(da)[1]), rep(2009, dim(da)[1]), rep(2010, dim(da)[1])),
	GRI=c(da$GRI_2007, da$GRI_2009, da$GRI_2010),

bobthecat / R_tut_2.r

Created August 16, 2012 01:40

R tutorial part 2

	### R code from vignette source 'Presentation_2.Rnw'
	### Encoding: UTF-8

	###################################################
	### code chunk number 1: init
	###################################################
	options(width=60)


	###################################################

bobthecat / r-tutorial.r

Created August 15, 2012 15:41

R tutorial

	### R code from vignette source 'Presentation.Rnw'
	### Encoding: UTF-8

	###################################################
	### code chunk number 1: init
	###################################################
	options(width=60)


	###################################################

David Ruau bobthecat