Skip to content

Instantly share code, notes, and snippets.

@dirkschumacher
Created February 22, 2020 21:23
Show Gist options
  • Save dirkschumacher/2dfb689e4f6fafbd7bc5f317e08ba916 to your computer and use it in GitHub Desktop.
Save dirkschumacher/2dfb689e4f6fafbd7bc5f317e08ba916 to your computer and use it in GitHub Desktop.
local({
  # a quick, just for fun base R implementation of group_by/summarise/`%>%`
  # many edge cases not covered
  # also group_by does not produce a data.frame with the same shape
  # as the input
  group_by <- function(data, ...) {
    exprs <- substitute(list(...))
    grouping_cols <- vapply(exprs[-1], as.character, character(1))
    grouping_formula <- as.formula(
      paste0(". ~", paste0(grouping_cols, collapse = "+"))
    )
    df <- aggregate(grouping_formula, data, identity)
    class(df) <- c("grouped_df", class(df))
    attr(df, "grouping_cols") <- grouping_cols
    df
  }
  
  summarise <- function(data, ..., envir = parent.frame()) {
    stopifnot(inherits(data, "grouped_df"))
    exprs <- substitute(list(...))
    grouping_cols <- attr(data, "grouping_cols")
    results <- apply(data, 1, function(row) {
      eval_envir <- as.environment(row)
      parent.env(eval_envir) <- envir
      group_len <- length(row[[setdiff(names(row), grouping_cols)[[1]]]])
      eval_envir$n <- function() group_len
      eval(exprs, envir = eval_envir)
    })
    cbind(
      data[, grouping_cols, drop = FALSE],
      do.call(rbind, lapply(results, data.frame))
    )
  }
  
  `%>%` <- function(lhs, rhs) {
    lhs <- substitute(lhs)
    rhs <- substitute(rhs)
    eval(
      as.call(c(rhs[[1]], lhs, as.list(rhs[-1]))),
      envir = parent.frame()
    )
  }
  
  mtcars %>% 
    group_by(cyl) %>% 
    summarise(mean = mean(disp), n = n())
})
#>   cyl     mean  n
#> 1   4 105.1364 11
#> 2   6 183.3143  7
#> 3   8 353.1000 14

withr::with_package("dplyr", {
  mtcars %>% 
    group_by(cyl) %>% 
    summarise(mean = mean(disp), n = n())
})
#> # A tibble: 3 x 3
#>     cyl  mean     n
#>   <dbl> <dbl> <int>
#> 1     4  105.    11
#> 2     6  183.     7
#> 3     8  353.    14

Created on 2020-02-22 by the reprex package (v0.3.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment