Skip to content

Instantly share code, notes, and snippets.

View mrdwab's full-sized avatar

Ananda Mahto mrdwab

View GitHub Profile
TrueSeq <- function(inLogi, zero2NA = FALSE) {
x <- rle(cumsum(!inLogi)[inLogi])$lengths
inLogi[inLogi] <- rep(seq_along(x), x)
if (isTRUE(zero2NA)) inLogi[inLogi == 0] <- NA
inLogi
}
#' Interleaves values within matrices or vectors
#'
#' Mimics some of the behavior of the \code{Riffle} function
#' (\url{http://reference.wolfram.com/mathematica/ref/Riffle.html}) in
#' Mathematica. For matrices, it interleaves the columns. For vectors, it
#' interleaves differently according to whether the subsequent values are
#' presented as separate values or whether they are grouped with \code{c()}.
#'
#' It is expected that all matrices to be interleaved would have the same
#' number of rows, though they may have differing numbers of columns. If they
@mrdwab
mrdwab / needleInHaystack.R
Created April 13, 2014 03:30
Find a needle in a haystack
#' Find a needle in a haystack...
#'
#' Find specified search patterns (in any order, not necessarily joined) in another vector of strings.
#'
#' @param findMe What are you looking for? A character vector.
#' @param findIn Where are you looking for it? A character vector.
#' @return A matrix with 1 indicating presence and 0 indicating absence.
#' @author Ananda Mahto
#' @references \url{http://stackoverflow.com/q/22129542/1270695}
#' @examples
@mrdwab
mrdwab / dcastMult.R
Last active August 29, 2015 13:59
dcast with multiple functions
dcastMult <- function(data, formula, value.var = "value",
funs = list("min" = min, "max" = max)) {
require(reshape2)
if (is.null(names(funs)) | any(names(funs) == "")) stop("funs must be named")
Form <- formula(formula)
LHS <- as.character(Form[[2]])
if (length(LHS) > 1) LHS <- LHS[-1]
temp <- lapply(seq_along(funs), function(Z) {
T1 <- dcast(data, Form, value.var = value.var,
fun.aggregate=match.fun(funs[[Z]]), fill = 0)
@mrdwab
mrdwab / cSplit.R
Last active March 14, 2023 05:03
The faster version of `concat.split` that makes use of `data.table` efficiency.
cSplit <- function(indt, splitCols, sep = ",", direction = "wide",
makeEqual = NULL, fixed = TRUE, drop = TRUE,
stripWhite = FALSE) {
message("`cSplit` is now part of the 'splitstackshape' package (V1.4.0)")
## requires data.table >= 1.8.11
require(data.table)
if (!is.data.table(indt)) setDT(indt)
if (is.numeric(splitCols)) splitCols <- names(indt)[splitCols]
if (any(!vapply(indt[, splitCols, with = FALSE],
is.character, logical(1L)))) {
@mrdwab
mrdwab / example.R
Created April 30, 2014 04:23
A custom version of `numMat` that lets the user specify the number of columns they want.
L <- c("1 1:1 2:1 3:1 5:1 6:1 8:1",
"5 1:1 2:1 4:1",
"9 1:1 2:1 7:1 10:1")
M <- replicate(10000, L)
M2 <- strsplit(M, "\\s+|:")
head(numMat(lapply(M2, `[`, -1)))
# 1 2 3 4 5 6 7 8 9 10
# [1,] 1 1 1 0 1 1 0 1 0 0
# [2,] 1 1 0 1 0 0 0 0 0 0
Col1 <- c("a", "b","b",NA)
Col2 <- c(NA, "a", "c", NA)
Col3 <- c(NA,NA,"b", "a")
dat <- data.frame(Col1, Col2, Col3)
fun1 <- function() {
cbind(dat,
apply(table(cbind(rn = 1:nrow(dat),
stack(lapply(dat, as.character)))),
c(1, 2), sum))
@mrdwab
mrdwab / oo_cell_properties_macro.txt
Created July 25, 2014 02:19
Cell-properties as function-results
REM ***** BASIC *****
Function CELL_NOTE(vSheet,lRowIndex&,iColIndex%)
Dim v
v = getSheetCell(vSheet,lRowIndex&,iColIndex%)
if vartype(v) = 9 then
CELL_NOTE = v.Annotation.getText.getString
else
CELL_NOTE = v
endif
End Function
@mrdwab
mrdwab / pdfWatermarkEncrypt.bat
Created August 8, 2014 10:33
Use PDFtk to add a "stamp" or a "background" to a PDF and restrict editing/copying.
@echo off
if [%1]==[] goto :eof
:loop
pdftk %1 stamp back.pdf output "%~dpn1_new%~x1" owner_pw somepasswordyouwant
shift
if not [%1]==[] goto loop
@mrdwab
mrdwab / stratifiedDT.R
Last active April 10, 2019 06:29
Attempt to rewrite stratified for `data.table`. The `data.frame` version can be found at https://gist.github.com/mrdwab/6424112
stratifiedDT <- function(indt, group, size, select = NULL,
replace = FALSE, keep.rownames = FALSE,
bothSets = FALSE) {
if (is.numeric(group)) group <- names(indt)[group]
if (!is.data.table(indt)) indt <- as.data.table(
indt, keep.rownames = keep.rownames)
if (is.null(select)) {
indt <- indt
} else {
if (is.null(names(select))) stop("'select' must be a named list")