This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Originally posted at http://gettinggeneticsdone.blogspot.com/2010/07/qq-plots-of-p-values-in-r-using-base.html | |
# Define the function | |
ggd.qqplot = function(pvector, main=NULL, ...) { | |
o = -log10(sort(pvector,decreasing=F)) | |
e = -log10( 1:length(o)/length(o) ) | |
plot(e,o,pch=19,cex=1, main=main, ..., | |
xlab=expression(Expected~~-log[10](italic(p))), | |
ylab=expression(Observed~~-log[10](italic(p))), | |
xlim=c(0,max(e)), ylim=c(0,max(o))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x=seq(-5,5,.01) | |
invlogit=function(x) exp(x)/(1+exp(x)) | |
y=invlogit(x) | |
plot(x,y,pch=16,ylab=expression(paste(logit^{-1},(x)))) | |
abline(v=0) | |
abline(h=.5) | |
text(.55,.55,expression(paste("Slope is ",beta/4)),adj=c(0,0)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to extract the overall ANOVA p-value out of a linear model object | |
lmp <- function (modelobject) { | |
if (class(modelobject) != "lm") stop("Not an object of class 'lm' ") | |
f <- summary(modelobject)$fstatistic | |
p <- pf(f[1],f[2],f[3],lower.tail=F) | |
attributes(p) <- NULL | |
return(p) | |
} | |
# simulate some data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# d is a data frame with 4 columns | |
# d$x gives variable names | |
# d$y gives center point | |
# d$ylo gives lower limits | |
# d$yhi gives upper limits | |
forestplot <- function(d, xlab="Odds Ratio", ylab="Study"){ | |
require(ggplot2) | |
p <- ggplot(d, aes(x=x, y=y, ymin=ylo, ymax=yhi)) + | |
geom_pointrange() + | |
coord_flip() + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls(all=TRUE)) | |
library(randomForest) | |
###############Classification################ | |
data(iris) | |
head(iris) | |
iris.rf <- randomForest(Species~., data=iris, importance=T, proximity=T) | |
iris.rf.subset <- randomForest(Species~., data=iris[c(1:3,5)], importance=T, proximity=T) | |
iris.rf.subset2 <- randomForest(Species~. -Petal.Length -Petal.Width, data=iris, importance=T, proximity=T) | |
print(iris.rf) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(randomForest) | |
############################################################################### | |
############################## load functions ################################# | |
############################################################################### | |
# need to document this! | |
rfr2 = function(randomForestModel) { | |
printoutput = capture.output(print(randomForestModel)) | |
varline = grep("explained",printoutput,value=TRUE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# permutes a column in a data.frame, sets seed optionally | |
permute <- function (dataframe, columnToPermute="column", seed=NULL) { | |
if (!is.null(seed)) set.seed(seed) | |
colindex <- which(names(dataframe)==columnToPermute) | |
permutedcol <- dataframe[ ,colindex][sample(1:nrow(dataframe))] | |
dataframe[colindex] <- permutedcol | |
return(dataframe) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load the iris data | |
data(iris) | |
# this data has 150 rows | |
nrow(iris) | |
# look at the first few | |
head(iris) | |
# splitdf function will return a list of training and testing sets |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#splitdf splits a data frame into a training and testing set. | |
#returns a list of two data frames: trainset and testset. | |
#you can optionally apply a random seed. | |
splitdf <- function(dataframe, seed=NULL, trainfrac=0.5) { | |
if (trainfrac<=0 | trainfrac>=1) stop("Training fraction must be between 0 and 1, not inclusive") | |
if (!is.null(seed)) set.seed(seed) | |
index <- 1:nrow(dataframe) | |
trainindex <- sample(index, trunc(length(index)/(1/trainfrac))) | |
trainset <- dataframe[trainindex, ] | |
testset <- dataframe[-trainindex, ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
propmiss <- function(dataframe) { | |
m <- sapply(dataframe, function(x) { | |
data.frame( | |
nmiss=sum(is.na(x)), | |
n=length(x), | |
propmiss=sum(is.na(x))/length(x) | |
) | |
}) | |
d <- data.frame(t(m)) | |
d <- sapply(d, unlist) |