Vessy · May 14, 2016 17:39
diff --git a/oneForMany.R b/oneForMany.R
 # Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function)
 # based on the user specified data frame, variables (that correspond to table column names) over which they 
 # want to split and summarize the data (character vector), and a two-column table (data frame) which contains 
 # the user specified name of the function used for summary (first column) and the function used for summary (second column)
 #
 # For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/
 #

 # Data, variable, and formulas used for data summary
 ex1_df <- datasets::mtcars
 ex1_vars <- "gear"
 ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE)

 # This is the expected result
 ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x) 
  data.frame(cust_val = x$mpg+2*x$carb, 
             total_hp = sum(x$hp),
             avg_mpg = mean(x$mpg)))

 # Testing version #!
 # It passes an expression vector to eval and only the last element is evaluated.
 ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas)

 # Testing version #2
 # Using sapply to overcome the eval expression vector issue
 # Everything is calculated, but data is not in desired format (due to column names mismatches)
 # and one would need to parse it
 ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas)

 # Testing version #3
 # Using a list instead of vector; everything is calculated, but data is not in desired format
 # and one would need to parse it
 ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas)

 # Testing version #4
 # Using eval to create a function that returns a data frame with desired columns (user specified functions)
 ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas)
 # We can test to confirm that the results are OK
 testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04))

 # Testing version #5
 # Using do.call to call plyer
 ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas)
 # We can test to confirm that the results are OK
 testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05))

 # Different data set
 ex2_df <- datasets::quakes
 ex2_vars <- c("lat", "long")
 ex2_formulas <-  data.frame(what=c("l1", "l2", "l3", "l4"), 
                      how=c("lat+2*long", "depth^2/log(mag+1)", "(lat+long)/(stations*0.5)", "mean(mag)"), stringsAsFactors = FALSE)

 ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x) 
  data.frame(l1 = x$lat+2*x$long, 
             l2 = x$depth^2/log(x$mag+1), 
             l3 = (x$lat+x$long)/(x$stations*0.5),
             l4 = mean(x$mag)))

 ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas)
 testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04))

 ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas)
 testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05))
diff --git a/oneForMany_fifth.R b/oneForMany_fifth.R
 oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
  
  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")
  
  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
  
  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")
  
  func_list <- eval(parse(text=
                            paste("as.list(c(",
                                  paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "),
                                  "\")))", sep="")))
  
  func_list <- setNames(func_list, as.character(cols_ops[,1]))
  
  result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix,  .fun = plyr::mutate), func_list))

  result_mutate[, c(cols_fix, as.character(cols_ops[,1]))]
 }
diff --git a/oneForMany_first.R b/oneForMany_first.R
 oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
  
  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")
  
  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
  
  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")
  
  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)
  
  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){
    
    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }
    
    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }
  
  # Create a vector of functions
  func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))
  
  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
 }
diff --git a/oneForMany_fourth.R b/oneForMany_fourth.R
 oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
  
  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")
  
  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
  
  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")
  
  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)
  
  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){
    
    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }
    
    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }
  
  func_list  <- function(x){
    eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep="")))
    eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep="")))
  }
  
  
  plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x))
 }
diff --git a/oneForMany_second.R b/oneForMany_second.R
 oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
  
  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")
  
  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
  
  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")
  
  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)
  
  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){
    
    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }
    
    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }
  
  # Create a vector of functions
  func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))
  
  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y))))
 }
diff --git a/oneForMany_third.R b/oneForMany_third.R
 oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
  
  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")
  
  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
  
  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")
  
  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)
  
  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){
    
    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }
    
    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }
  
  # Create a vector of functions
  func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep=""))
  
  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
 }
	# Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function)
	# based on the user specified data frame, variables (that correspond to table column names) over which they
	# want to split and summarize the data (character vector), and a two-column table (data frame) which contains
	# the user specified name of the function used for summary (first column) and the function used for summary (second column)
	#
	# For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/
	#

	# Data, variable, and formulas used for data summary
	ex1_df <- datasets::mtcars
	ex1_vars <- "gear"
	ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE)

	# This is the expected result
	ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x)
	data.frame(cust_val = x$mpg+2*x$carb,
	total_hp = sum(x$hp),
	avg_mpg = mean(x$mpg)))

	# Testing version #!
	# It passes an expression vector to eval and only the last element is evaluated.
	ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #2
	# Using sapply to overcome the eval expression vector issue
	# Everything is calculated, but data is not in desired format (due to column names mismatches)
	# and one would need to parse it
	ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #3
	# Using a list instead of vector; everything is calculated, but data is not in desired format
	# and one would need to parse it
	ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #4
	# Using eval to create a function that returns a data frame with desired columns (user specified functions)
	ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas)
	# We can test to confirm that the results are OK
	testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04))

	# Testing version #5
	# Using do.call to call plyer
	ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas)
	# We can test to confirm that the results are OK
	testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05))

	# Different data set
	ex2_df <- datasets::quakes
	ex2_vars <- c("lat", "long")
	ex2_formulas <- data.frame(what=c("l1", "l2", "l3", "l4"),
	how=c("lat+2long", "depth^2/log(mag+1)", "(lat+long)/(stations0.5)", "mean(mag)"), stringsAsFactors = FALSE)

	ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x)
	data.frame(l1 = x$lat+2*x$long,
	l2 = x$depth^2/log(x$mag+1),
	l3 = (x$lat+x$long)/(x$stations*0.5),
	l4 = mean(x$mag)))

	ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas)
	testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04))

	ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas)
	testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05))
	oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	func_list <- eval(parse(text=
	paste("as.list(c(",
	paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "),
	"\")))", sep="")))

	func_list <- setNames(func_list, as.character(cols_ops[,1]))

	result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix, .fun = plyr::mutate), func_list))

	result_mutate[, c(cols_fix, as.character(cols_ops[,1]))]
	}
	oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
	}
	oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	func_list <- function(x){
	eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep="")))
	eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep="")))
	}


	plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x))
	}
	oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y))))
	}
	oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
	}