-
-
Save skranz/9681509 to your computer and use it in GitHub Desktop.
# Helper functions that allow string arguments for dplyr's data modification functions like arrange, select etc. | |
# Author: Sebastian Kranz | |
# Examples are below | |
#' Modified version of dplyr's filter that uses string arguments | |
#' @export | |
s_filter = function(.data, ...) { | |
eval.string.dplyr(.data,"filter", ...) | |
} | |
#' Modified version of dplyr's select that uses string arguments | |
#' @export | |
s_select = function(.data, ...) { | |
eval.string.dplyr(.data,"select", ...) | |
} | |
#' Modified version of dplyr's arrange that uses string arguments | |
#' @export | |
s_arrange = function(.data, ...) { | |
eval.string.dplyr(.data,"arrange", ...) | |
} | |
#' Modified version of dplyr's arrange that uses string arguments | |
#' @export | |
s_mutate = function(.data, ...) { | |
eval.string.dplyr(.data,"mutate", ...) | |
} | |
#' Modified version of dplyr's summarise that uses string arguments | |
#' @export | |
s_summarise = function(.data, ...) { | |
eval.string.dplyr(.data,"summarise", ...) | |
} | |
#' Modified version of dplyr's group_by that uses string arguments | |
#' @export | |
s_group_by = function(.data, ...) { | |
eval.string.dplyr(.data,"group_by", ...) | |
} | |
#' Internal function used by s_filter, s_select etc. | |
eval.string.dplyr = function(.data, .fun.name, ...) { | |
args = list(...) | |
args = unlist(args) | |
code = paste0(.fun.name,"(.data,", paste0(args, collapse=","), ")") | |
df = eval(parse(text=code,srcfile=NULL)) | |
df | |
} | |
# Examples | |
library(dplyr) | |
# Original usage of dplyr | |
mtcars %.% | |
filter(gear == 3,cyl == 8) %.% | |
select(mpg, cyl, hp:vs) | |
# Select user specified cols. | |
# Note that you can have a vector of strings | |
# or a single string separated by ',' or a mixture of both | |
cols = c("mpg","cyl, hp:vs") | |
mtcars %.% | |
filter(gear == 3,cyl == 8) %.% | |
s_select(cols) | |
# Filter using a string | |
col = "gear" | |
mtcars %.% | |
s_filter(paste0(col,"==3"), "cyl==8" ) %.% | |
select(mpg, cyl, hp:vs) | |
# Arrange without using %.% | |
s_arrange(mtcars, "-mpg, gear, carb") | |
# group_by and summarise with strings | |
mtcars %.% | |
s_group_by("cyl") %.% | |
s_summarise("mean(disp), max(disp)") |
Sebastian. Thanks so much. This is just what I was looking for!
Very useful for loops. Thanks !
This is great! Thanks for this... I have been banging my head against the wall for 2 weeks trying to get to a much less elegant solution that you have contributed here.
I found this gist while looking for a way to use strings of variable names (or even a vector of variable names as strings) with dplyr functions, but it turns out that a lot of this has since been incorporated directly into dplyr. See the "non-standard evaluation" vignette in recent versions:
vignette("nse", package="dplyr")
I agree that this was much-needed functionality, and these are some great little utility functions: thanks for sharing.
I like it. It inspired me to try my hand at standard evaluation adapters for dplyr
0.7.0
using the new rlang
/ tidyeval
notation. Mostly I am trying to keep to column names, so deliberately I have only made arrange_se()
nearly as flexible as your s_arrange()
. I finally found my notes linking to the page and belatedly added a credit to my project (sorry about the delay).
I like this solution. Where do I need to paste this code to get this functionality?