Created November 22, 2020 21:12
Extracing columns names with tidyselect: fastest implementation?
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>     filter, lag
#> The following objects are masked from 'package:base':
#>     intersect, setdiff, setequal, union

get_cols <- function(df, m){
  cols <- colnames(df)
  cols[starts_with(m,  vars = cols)]

get_cols2 <- function(df, m){
  pos <- eval_select(starts_with(m), df)

get_cols3 <- function(df, expr){
  pos <- eval_select(expr, df)

get_cols4 <- function(df, ...){
  pos <- eval_select(expr(c(...)), df)

get_cols_full <- function(df, ...){
  df %>% 
    select(...) %>% 

get_cols(iris, "Sepal")
#> [1] "Sepal.Length" "Sepal.Width"
get_cols2(iris, "Sepal")
#> [1] "Sepal.Length" "Sepal.Width"
get_cols3(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"
get_cols3(iris, c(starts_with("Sepal"), contains("Species")))
#> [1] "Sepal.Length" "Sepal.Width"  "Species"
get_cols4(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"
get_cols_full(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"

microbenchmark::microbenchmark(a=get_cols(iris, "Sepal"),
                               a2=get_cols(iris, "Sepal"),
                               a3=get_cols3(iris, starts_with("Sepal")),
                               a4=get_cols4(iris, starts_with("Sepal")),
                               b=get_cols_full(iris, starts_with("Sepal")),
                               check="equal") %>% 
  summary() %>% %>% 
expr min lq mean median uq max neval cld
a 37.546 47.6365 57.24979 53.0745 58.9625 220.526 100 a
a2 37.685 49.7940 56.36087 53.3735 59.7245 88.708 100 a
a3 284.418 295.4355 351.84057 311.8375 334.2105 1923.586 100 b
a4 844.777 880.8530 1019.41088 957.0495 1069.0475 2666.677 100 c
b 1052.824 1119.2700 1293.29644 1209.8315 1364.9575 2796.941 100 d

Created on 2020-11-22 by the reprex package (v0.3.0)

