Created
November 10, 2015 23:55
-
-
Save dgrtwo/7d175bcc213aceaefd4f to your computer and use it in GitHub Desktop.
Tidy distributed models on a partitioned data frame from dplyr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## helper and setup functions | |
wrap_party_df <- function(func) { | |
function(x, object, ...) { | |
n <- col_name(substitute(object)) | |
# have to create an expression since we cannot rely on local | |
# variables | |
args <- list(...) | |
expr <- substitute(do.call(func, c(list(.[[colname]][[1]]), args)), | |
list(func = func, colname = n, args = args)) | |
x %>% | |
group_by_(.dots = x$groups) %>% | |
do_(expr) | |
} | |
} | |
#' Tidying methods for partitioned data frames (party_dfs) from dplyr | |
#' | |
#' These \code{tidy}, \code{augment} and \code{glance} methods are for | |
#' performing tidying on each group of a partitioned data frame. | |
#' | |
#' @param x a party_df | |
#' @param object the bare column name of the column containing the models to | |
#' be tidied | |
#' @param ... additional arguments to pass on to the respective tidying method | |
#' | |
#' @return A \code{"party_df"}, where tidying has been done for each result. | |
#' Use \code{collect} to retrieve it. | |
#' | |
#' @seealso rowwise_df_tidiers | |
#' | |
#' @examples | |
#' | |
#' library(dplyr) | |
#' library(multidplyr) | |
#' library(nycflights13) | |
#' | |
#' # fit a linear model for each partition | |
#' | |
#' lms <- flights %>% | |
#' filter(!is.na(dep_delay), !is.na(arr_delay)) %>% | |
#' group_by(flight) %>% | |
#' filter(n() > 10) %>% | |
#' partition(flight) %>% | |
#' do(mod = lm(arr_delay ~ dep_delay, .)) | |
#' | |
#' lms %>% tidy(mod) | |
#' lms %>% tidy(mod, conf.int = TRUE) | |
#' lms %>% tidy(mod) %>% collect() | |
#' | |
#' lms %>% glance(mod) | |
#' lms %>% glance(mod) %>% collect() | |
#' | |
#' @name party_df_tidiers | |
NULL | |
#' @rdname party_df_tidiers | |
#' @export | |
tidy.party_df <- wrap_party_df(broom::tidy) | |
#' @rdname party_df_tidiers | |
#' @export | |
augment.party_df <- wrap_party_df(broom::augment) | |
#' @rdname party_df_tidiers | |
#' @export | |
glance.party_df <- wrap_party_df(broom::glance) | |
### To-do: handle tidy_, augment_, glance_ (SE methods) | |
### Handle data argument as a column for augment. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment