This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# These column numbers represent fields with name/contact info that I've | |
# marked with 1s and 0s depending on whether or not there's anything in | |
# the field. | |
bio_cols = c(5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,23,24,25,26) | |
# Now we get the row numbers of all the records with duplicate IDs | |
dupe_id_rows = which(duplicated(big.dataset$ID) == TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if (!require("pacman")) install.packages("pacman") | |
pacman::p_load(dplyr, tidyr, wakefield) | |
set.seed(10) | |
dat <- r_data_frame(n = 10000, | |
race, | |
age, | |
sex | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pander.CrossTable <- function(x, ...) { | |
pandoc.table(x$t, caption = 'Observed Count') | |
pandoc.table(x$prop.row, caption = 'Row percentages') | |
pandoc.table(x$prop.col, caption = 'Column percentages') | |
emphasize.strong.cells (which(x$chisq$residuals > 2,arr.ind=T)) | |
emphasize.strong.cells (which(x$chisq$residuals < -2,arr.ind=T)) | |
pandoc.table(x$chisq$residuals, caption = 'Residuals of the Chi-sqaured test') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
coalesce<-function(...) { | |
x<-lapply(list(...), function(z) {if (is.factor(z)) as.character(z) else z}) | |
m<-is.na(x[[1]]) | |
i<-2 | |
while(any(m) & i<=length(x)) { | |
if ( length(x[[i]])==length(x[[1]])) { | |
x[[1]][m]<-x[[i]][m] | |
} else if (length(x[[i]])==1) { | |
x[[1]][m]<-x[[i]] | |
} else { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# version 1.8.11 (commit 1048) | |
require(data.table) | |
# Loading required package: data.table | |
# data.table 1.8.11 For help type: help("data.table") | |
## create a huge data.table: | |
## ------------------------- | |
set.seed(1) | |
N <- 2e7 # size of DT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://stackoverflow.com/a/30781090/2725969 | |
sample0110b <- function(size, n) { | |
size <- as.integer(size) | |
n <- as.integer(n) | |
if(size > 25 || size < 3L) stop("Size out of valid range") | |
# Generate integer pool and weights |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
regcapturedmatches<-function(x,m) { | |
if (length(x) != length(m)) | |
stop(gettextf("%s and %s must have the same length", | |
sQuote("x"), sQuote("m")), domain = NA) | |
ili <- is.list(m) | |
useBytes <- if (ili) { | |
any(unlist(lapply(m, attr, "useBytes"))) | |
} else { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################## | |
# title : InstallPackages.R; | |
# purpose : install R packages commonly used by Adam H.Sparks when upgrading or installing R; | |
# producer : prepared by A. H. Sparks; | |
# last update : in Los Baños, Laguna, PHL, May 2015; | |
# inputs : none; | |
# outputs : none; | |
# remarks 1 : in order to download any packages, you need to be on-line, of course; | |
# remarks 2 : for country outlines and the like see http://www.gadm.org/ to download Rdata packages; | |
############################################################################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fingerprint <- function(x, y) { | |
if (!inherits(x, "character") | !inherits(y, "character")) { | |
stop("x and y must be character strings") | |
} | |
x1 <- strsplit(x, "")[[1]] | |
y1 <- strsplit(y, "")[[1]] | |
f <- c(x1, y1) | |
final <- paste(f[order(f)], collapse = "") | |
return(final) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(Rserve) | |
library(data.table) | |
port = 9411:9414 # we have 4 ports designed to run cluster, so up to 4 nodes | |
dt = data.table(time_year = 2012:2014) # simulate input data from which we dynamically derive partitions by year | |
partitioning = quote(time_year) | |
partitions = dt[,unique(eval(as.name(partitioning)))] # extract all partitions from reference dataset | |
names(partitions) = partitions | |
port = port[seq_along(partitions)] | |
names(port) = as.character(partitions) |