This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stratified <- function(df, group, size, select = NULL, | |
replace = FALSE, bothSets = FALSE) { | |
if (is.null(select)) { | |
df <- df | |
} else { | |
if (is.null(names(select))) stop("'select' must be a named list") | |
if (!all(names(select) %in% names(df))) | |
stop("Please verify your 'select' argument") | |
temp <- sapply(names(select), | |
function(x) df[[x]] %in% select[[x]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
padNA <- function (mydata, rowsneeded, first = TRUE) | |
{ | |
temp1 = colnames(mydata) | |
rowsneeded = rowsneeded - nrow(mydata) | |
temp2 = setNames( | |
data.frame(matrix(rep(NA, length(temp1) * rowsneeded), | |
ncol = length(temp1))), temp1) | |
if (isTRUE(first)) rbind(mydata, temp2) | |
else rbind(temp2, mydata) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Split concatenated cells in a \code{data.frame} or a \code{data.table} | |
#' | |
#' A variation of the \code{concat.split} family of functions designed for | |
#' large rectangular datasets. | |
#' | |
#' While the general \code{concat.split} functions are able to handle | |
#' "unbalanced" datasets (for example, where the number of fields in a given | |
#' column might differ from row to row) because of the nature of \code{fread} | |
#' from the "data.table" package, this function does not support such data | |
#' types. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Factor vectors with multiple levels | |
#' | |
#' \code{\link{factor}} does not let you use duplicated levels nicely. It results | |
#' in an ugly warning message and you need to use \code{\link{droplevels}} to get | |
#' the desired output. | |
#' | |
#' The "solution" is to first factor the vector, and then use a named \code{list} | |
#' with the \code{\link{levels}} function. This function is a wrapper around | |
#' those steps. | |
#' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
moveme <- function(invec, movecommand) { | |
movecommand <- lapply(strsplit(strsplit(movecommand, ";")[[1]], ",|\\s+"), | |
function(x) x[x != ""]) | |
movelist <- lapply(movecommand, function(x) { | |
Where <- x[which(x %in% c("before", "after", "first", "last")):length(x)] | |
ToMove <- setdiff(x, Where) | |
list(ToMove, Where) | |
}) | |
myVec <- invec | |
for (i in seq_along(movelist)) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CUT <- function (x, breaks, labels = NULL, include.lowest = FALSE, right = TRUE, | |
dig.lab = 3L, ordered_result = FALSE, ...) | |
{ | |
if (!is.numeric(x)) | |
stop("'x' must be numeric") | |
if (length(breaks) == 1L) { | |
if (is.na(breaks) || breaks < 2L) | |
stop("invalid number of intervals") | |
nb <- as.integer(breaks + 1) | |
dx <- diff(rx <- range(x, na.rm = TRUE)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Interleave <- function(myList, append.source = TRUE, sep = ": ", drop = FALSE) { | |
sources <- myList | |
sources[sapply(sources, is.null)] <- NULL | |
sources <- lapply(sources, function(x) if (is.matrix(x) || | |
is.data.frame(x)) | |
x | |
else t(x)) | |
nrows <- sapply(sources, nrow) | |
mrows <- max(nrows) | |
if (any(nrows != mrows & nrows != 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WeekDays <- function(startOn = "Monday", abbreviate = FALSE) { | |
WD <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday") | |
x <- match(startOn, WD) | |
WD <- WD[c(x:7, setdiff(1:7, x:7))] | |
if (isTRUE(abbreviate)) { | |
substring(WD, 0, 3) | |
} else WD | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
helpExtract <- function(Function, section = "Usage", type = "m_code") { | |
A <- deparse(substitute(Function)) | |
x <- capture.output(tools:::Rd2txt(utils:::.getHelpFile(utils::help(A)), | |
options = list(sectionIndent = 0))) | |
B <- grep("^_", x) ## section start lines | |
x <- gsub("_\b", "", x, fixed = TRUE) ## remove "_\b" | |
X <- rep(FALSE, length(x)) | |
X[B] <- 1 | |
out <- split(x, cumsum(X)) | |
out <- out[[which(sapply(out, function(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(microbenchmark) | |
## Change "n" to experiment with different sized `data.frame`s | |
set.seed(1) | |
n = 100000 | |
## I couldn't think of other constants off the top of my head | |
## This should give us 26+26+50+50 "dummy variable" columns. | |
example <- data.frame(strcol = sample( |