Skip to content

Instantly share code, notes, and snippets.

@tomhopper
Last active June 13, 2019 19:08
Show Gist options
  • Save tomhopper/17450476be3e8703beb2 to your computer and use it in GitHub Desktop.
Save tomhopper/17450476be3e8703beb2 to your computer and use it in GitHub Desktop.
Rprofile file
## For original file showing use of .env to add functions invisibly, see
## \link{http://gettinggeneticsdone.blogspot.com/2013/06/customize-rprofile.html}
## Load packages
#library(BiocInstaller)
## Don't show those silly significanct stars
#options(show.signif.stars=FALSE)
## Do you want to automatically convert strings to factor variables in a data.frame?
## WARNING!!! This makes your code less portable/reproducible.
#options(stringsAsFactors=FALSE)
## Get the sqldf package to play nicely on OSX. No longer necessary with R 3.0.0
## From http://stackoverflow.com/questions/8219747/sqldf-package-in-r-querying-a-data-frame
## options(sqldf.driver="SQLite")
# options(gsubfn.engine = "R")
## Don't ask me for my CRAN mirror every time
#options("repos" = c(CRAN = "http://cran.rstudio.com/"))
## Create a new invisible environment for all the functions to go in so it doesn't clutter your workspace.
.env <- new.env()
## Returns a logical vector TRUE for elements of X not in Y
.env$"%nin%" <- function(x, y) !(x %in% y)
## Returns names(df) in single column, numbered matrix format.
#.env$n <- function(df) matrix(names(df))
## Single character shortcuts for summary() and head().
.env$s <- base::summary
.env$h <- utils::head
## ht==headtail, i.e., show the first and last 10 items of an object
.env$ht <- function(d) rbind(head(d,10),tail(d,10))
## Show the first 5 rows and first 5 columns of a data frame or matrix
.env$hh <- function(d) if(class(d)=="matrix"|class(d)=="data.frame") d[1:5,1:5]
## Read data on clipboard.
#.env$read.cb <- function(...) {
# ismac <- Sys.info()[1]=="Darwin"
# if (!ismac) read.table(file="clipboard", ...)
# else read.table(pipe("pbpaste"), ...)
#}
## Strip row names from a data frame (stolen from plyr)
.env$unrowname <- function(x) {
rownames(x) <- NULL
x
}
## List objects and classes (from @_inundata, mod by ateucher)
.env$lsa <- function() {
obj_type <- function(x) class(get(x, envir = .GlobalEnv)) # define environment
foo = data.frame(sapply(ls(envir = .GlobalEnv), obj_type))
foo$object_name = rownames(foo)
names(foo)[1] = "class"
names(foo)[2] = "object"
return(unrowname(foo))
}
## List all functions in a package (also from @_inundata)
.env$lsp <-function(package, all.names = FALSE, pattern) {
package <- deparse(substitute(package))
ls(
pos = paste("package", package, sep = ":"),
all.names = all.names,
pattern = pattern
)
}
## Open Finder to the current directory on mac
## WARNING: this makes code much less portable
.env$macopen <- function(...) if(Sys.info()[1]=="Darwin") system("open .")
#.env$o <- function(...) if(Sys.info()[1]=="Darwin") system("open .")
#' PRESS
#' @description Calculates the predicted residual sum of squares, or PRESS, statistic.
#' Used primarily to compare alternative models.
#' @param linear.model An \code{lm} or \code{glm} object
#' @return The PRESS statistic
.env$PRESS <- function(linear.model) {
#' calculate the predictive residuals
pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
#' calculate the PRESS
PRESS <- sum(pr^2)
return(PRESS)
}
#' Predicted R squared
#' @description Calculates the predicted r-squared statistic.
#' Used primarily to compare alternative models.
#' @param linear.model An \code{lm} or \code{glm} object
#' @return The predicted r squared value
.env$pred_r_squared <- function(linear.model) {
#' Use anova() to get the sum of squares for the linear model
lm.anova <- anova(linear.model)
#' Calculate the total sum of squares
tss <- sum(lm.anova$'Sum Sq')
# Calculate the predictive R^2
pred.r.squared <- 1-PRESS(linear.model)/(tss)
return(pred.r.squared)
}
#' Model Fit Statistics
#' @description Returns lm model fit statistics R-squared, adjucted R-squared,
#' predicted R-squared and PRESS.
#' Thanks to John Mount for his 6-June-2014 blog post, R style tip: prefer functions that return data frames" for
#' the idea \url{http://www.win-vector.com/blog/2014/06/r-style-tip-prefer-functions-that-return-data-frames}
#' @param ... One or more \code{lm()} models.
#' @return A data frame with rows for R-squared, adjusted R-squared, Predictive R-squared and PRESS statistics, and a column for each model passed to the function.
.env$model_fit_stats <- function(...) {
var_names <- as.character(match.call())[-1]
dots <- list(...)
ndots <- length(dots)
if(all.equal(unlist(lapply(X = dots, FUN = class)), rep("lm", length.out = ndots))){
return.df <- data.frame(parameter = c("r.squared", "adj.r.squared","pred.r.squared","PRESS"))
for(i in 1:ndots) {
r.sqr <- summary(dots[[i]])$r.squared
adj.r.sqr <- summary(dots[[i]])$adj.r.squared
pre.r.sqr <- pred_r_squared(dots[[i]])
PRESS <- PRESS(dots[[i]])
return.df[paste0(var_names[i])] <- matrix(c(r.sqr, adj.r.sqr, pre.r.sqr, PRESS), nrow = 4, byrow = FALSE)
}
# r.squared = r.sqr, adj.r.squared = adj.r.sqr, pred.r.squared = pre.r.sqr, press = PRESS)
return(return.df)
} else {
stop("model_fit_stats only works with objects of class 'lm.'")
}
}
#' Remove rows from data frame containing only NA in pipe-friendly manner
#' @description Accepts a data frame and strips out any rows
#' containing only \code{NA} values, then returns the resulting data frame.
#' @param A data frame
#' @return A data frame
#' @source \url{http://stackoverflow.com/a/6437778}
.env$strip_na_rows <- function(the_df) {
the_df[rowSums(is.na(the_df)) != ncol(the_df),]
return(the_df)
}
#' is.int moved to package numbr
#' is.int
#' @description Determines if a vector fits the IEEE definition of an integer.
#' @param v the vector to check
#' @return Returns TRUE if vector v is an integer, otherwise FALSE.
#' @source \url{http://www.win-vector.com/blog/2015/06/r-in-a-64-bit-world/}
#.env$is.int <- function(v) {
# is.numeric(v) &
# v > -2^53 & v < 2^53 &
# (floor(v)==ceiling(v))
#}
#result <- source("~/Dropbox/R Scripts/myrnorm.r", local = .env)
#if (class(result) != "list" & length(result) != 2) print("\nCustom rnorm functions failed to load.")
result <- source("~/Dropbox/R Scripts/myttest.r", local = .env)
if (class(result) != "list" & length(result) != 2) print("\nCustom t.test function failed to load.")
result <- source("~/Dropbox/R Scripts/mySixSigma.r", local = .env)
if (class(result) != "list" & length(result) != 2) print("\nCustom Six Sigma functions failed to load.")
result <- source("~/Dropbox/R Scripts/AssignWithMetaData.r", local = .env)
if (class(result) != "list" & length(result) != 2) print("\nCustom Assign functions failed to load.")
result <- source("~/Dropbox/R Scripts/string_funcs.r", local = .env)
if (class(result) != "list" & length(result) != 2) print("\nCustom string functions failed to load.")
#result <- source("~/Dropbox/R Scripts/myrunif.r", local = .env)
#if (class(result) != "list" & length(result) != 2) print("\nCustom runif functions failed to load.")
rm(result)
## Attach all the variables and functions above
attach(.env)
print(".env nominally attached; about to run .First")
## .First() run at the start of every R session.
## Use to load commonly used packages?
.First <- function() {
#Sys.setlocale(, "en_US.UTF-8")
#Sys.setenv(R_GSCMD = "/opt/local/bin/gs")
if(require(rmaxmin)) {
print("Successfully loaded rmaxmin from .Rprofile.")
} else {
print("rmaxmin failed to load from .Rprofile.")
}
if(require(numbr)) {
print("Successfully loaded numbr from .Rprofile.")
} else {
print("numbr failed to load from .Rprofile.")
}
if(require(randr)) {
print("Successfully loaded randr from .Rprofile.")
} else {
print("randr failed to load from .Rprofile.")
}
# if(require(ggplot2)) {
# print("Successfully loaded ggplot2 from .Rprofile.")
# } else {
# print("ggplot2 failed to load from .Rprofile.")
# }
# if(require(dplyr)) {
# print("Successfully loaded dplyr from .Rprofile.")
# } else {
# print("dplyr failed to load from .Rprofile.")
# }
# if(require(tidyr)) {
# print("Successfully loaded tidyr from .Rprofile.")
# } else {
# print("tidyr failed to load from .Rprofile.")
# }
cat("\nSuccessfully loaded .Rprofile from home directory at", date(), "\n")
cat("To see functions loaded from .Rprofile, type \"lsf.str(envir = .env)\"\n")
}
## .Last() run at the end of the session
.Last <- function() {
# save command history here?
cat("\nGoodbye at ", date(), "\n")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment