Last active
May 14, 2016 17:39
-
-
Save Vessy/225797327570867ecd23894de126bee9 to your computer and use it in GitHub Desktop.
Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function) based on the user specified data frame, variables (that correspond to table column names) over which the data should be split and summarized (character vector), and a two-column table (data frame) which contains the specified name of the …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function) | |
# based on the user specified data frame, variables (that correspond to table column names) over which they | |
# want to split and summarize the data (character vector), and a two-column table (data frame) which contains | |
# the user specified name of the function used for summary (first column) and the function used for summary (second column) | |
# | |
# For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/ | |
# | |
# Data, variable, and formulas used for data summary | |
ex1_df <- datasets::mtcars | |
ex1_vars <- "gear" | |
ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE) | |
# This is the expected result | |
ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x) | |
data.frame(cust_val = x$mpg+2*x$carb, | |
total_hp = sum(x$hp), | |
avg_mpg = mean(x$mpg))) | |
# Testing version #! | |
# It passes an expression vector to eval and only the last element is evaluated. | |
ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas) | |
# Testing version #2 | |
# Using sapply to overcome the eval expression vector issue | |
# Everything is calculated, but data is not in desired format (due to column names mismatches) | |
# and one would need to parse it | |
ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas) | |
# Testing version #3 | |
# Using a list instead of vector; everything is calculated, but data is not in desired format | |
# and one would need to parse it | |
ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas) | |
# Testing version #4 | |
# Using eval to create a function that returns a data frame with desired columns (user specified functions) | |
ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas) | |
# We can test to confirm that the results are OK | |
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04)) | |
# Testing version #5 | |
# Using do.call to call plyer | |
ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas) | |
# We can test to confirm that the results are OK | |
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05)) | |
# Different data set | |
ex2_df <- datasets::quakes | |
ex2_vars <- c("lat", "long") | |
ex2_formulas <- data.frame(what=c("l1", "l2", "l3", "l4"), | |
how=c("lat+2*long", "depth^2/log(mag+1)", "(lat+long)/(stations*0.5)", "mean(mag)"), stringsAsFactors = FALSE) | |
ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x) | |
data.frame(l1 = x$lat+2*x$long, | |
l2 = x$depth^2/log(x$mag+1), | |
l3 = (x$lat+x$long)/(x$stations*0.5), | |
l4 = mean(x$mag))) | |
ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas) | |
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04)) | |
ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas) | |
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){ | |
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops))) | |
stop("You did not specify all arguments!") | |
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops))) | |
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!") | |
if (ncol(cols_ops)>2) | |
stop("Data frame defined by third argument should have only two columns!") | |
func_list <- eval(parse(text= | |
paste("as.list(c(", | |
paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "), | |
"\")))", sep=""))) | |
func_list <- setNames(func_list, as.character(cols_ops[,1])) | |
result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix, .fun = plyr::mutate), func_list)) | |
result_mutate[, c(cols_fix, as.character(cols_ops[,1]))] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){ | |
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops))) | |
stop("You did not specify all arguments!") | |
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops))) | |
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!") | |
if (ncol(cols_ops)>2) | |
stop("Data frame defined by third argument should have only two columns!") | |
# Transform assignments into appropriate format | |
col_names.sorted <- sort(colnames(df_to_use)) | |
col_names.length <- length(col_names.sorted) | |
# DF needs to be in stringsAsFactors=FALSE mode | |
for (j in 1:nrow(cols_ops)){ | |
for (i in col_names.length:1){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep="")) | |
} | |
for (i in 1:col_names.length){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep="")) | |
} | |
} | |
# Create a vector of functions | |
func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="")) | |
# Run plyer | |
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list))) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){ | |
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops))) | |
stop("You did not specify all arguments!") | |
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops))) | |
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!") | |
if (ncol(cols_ops)>2) | |
stop("Data frame defined by third argument should have only two columns!") | |
# Transform assignments into appropriate format | |
col_names.sorted <- sort(colnames(df_to_use)) | |
col_names.length <- length(col_names.sorted) | |
# DF needs to be in stringsAsFactors=FALSE mode | |
for (j in 1:nrow(cols_ops)){ | |
for (i in col_names.length:1){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep="")) | |
} | |
for (i in 1:col_names.length){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep="")) | |
} | |
} | |
func_list <- function(x){ | |
eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep=""))) | |
eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep=""))) | |
} | |
plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){ | |
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops))) | |
stop("You did not specify all arguments!") | |
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops))) | |
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!") | |
if (ncol(cols_ops)>2) | |
stop("Data frame defined by third argument should have only two columns!") | |
# Transform assignments into appropriate format | |
col_names.sorted <- sort(colnames(df_to_use)) | |
col_names.length <- length(col_names.sorted) | |
# DF needs to be in stringsAsFactors=FALSE mode | |
for (j in 1:nrow(cols_ops)){ | |
for (i in col_names.length:1){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep="")) | |
} | |
for (i in 1:col_names.length){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep="")) | |
} | |
} | |
# Create a vector of functions | |
func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="")) | |
# Run plyer | |
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y)))) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){ | |
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops))) | |
stop("You did not specify all arguments!") | |
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops))) | |
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!") | |
if (ncol(cols_ops)>2) | |
stop("Data frame defined by third argument should have only two columns!") | |
# Transform assignments into appropriate format | |
col_names.sorted <- sort(colnames(df_to_use)) | |
col_names.length <- length(col_names.sorted) | |
# DF needs to be in stringsAsFactors=FALSE mode | |
for (j in 1:nrow(cols_ops)){ | |
for (i in col_names.length:1){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep="")) | |
} | |
for (i in 1:col_names.length){ | |
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep="")) | |
} | |
} | |
# Create a vector of functions | |
func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep="")) | |
# Run plyer | |
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment