jennybc · February 7, 2015 19:50 · aronlindberg · Feb 13, 2015 · jennybc · Feb 19, 2015
diff --git a/digest_header_links.R b/digest_header_links.R
 #' Digest the Link header in a paginated result.
 #' 
 #' Converts the Link header from a monolithic string to a usable data.frame.
 #' 
 #' The GitHub API automatically paginates when the number of requested items 
 #' exceeds the number of items per page. When this occurs, the result returned 
 #' by the server will include a Link header that provides the URLs for other 
 #' pages of results, such as the next page and the last page. These assorted 
 #' URLs are catenated in a single string and this function converts that 
 #' information into a data.frame that is useful for traversing the pages.
 #' 
 #' @param x Output of a function that gets potentially paginated results, e.g., 
 #'   \code{get.*.repositories()}
 #'   
 #' @return A data.frame, one row per URL = page. Maximum number of rows is four:
 #'   one each for "next", "last", "first", and "prev" page, indicated by the 
 #'   \code{rel} variable. The \code{per_page} variable will be constant across 
 #'   all rows and gives the number of items per page. If header contains no
 #'   links at all, the return value is NULL and a message is given.
 #'   
 #' @references 
 #' \url{https://developer.github.com/guides/traversing-with-pagination/} 
 #' \url{https://developer.github.com/v3/#pagination}
 #' 
 #' @examples
 #' repos  <- get.organization.repositories(org = "STAT545-UBC", per_page = 1)
 #' digest_header_links(repos)
 digest_header_links <- function(x) {
  y <- x$headers$link
  if(is.null(y)) {
    message("No links found in header.")
    return(NULL)
  }
  y %>%
    str_split(", ") %>% unlist %>%  # split into e.g. next, last, first, prev
    str_split_fixed("; ", 2) %>%    # separate URL from the relation
    plyr::alply(2) %>%              # workaround: make into a list
    dplyr::as_data_frame %>%        # convert to data.frame, no factors!
    setNames(c("URL", "rel")) %>%   # sane names
    dplyr::mutate_(rel = ~ str_match(rel, "next|last|first|prev"),
                   per_page = ~ str_match(URL, "per_page=([0-9]+)") %>%
                     `[`( , 2) %>% as.integer,
                   page = ~ str_match(URL, "&page=([0-9]+)") %>%
                     `[`( , 2) %>% as.integer,
                   URL = ~ str_replace_all(URL, "<|>", ""))
 }
	#' Digest the Link header in a paginated result.
	#'
	#' Converts the Link header from a monolithic string to a usable data.frame.
	#'
	#' The GitHub API automatically paginates when the number of requested items
	#' exceeds the number of items per page. When this occurs, the result returned
	#' by the server will include a Link header that provides the URLs for other
	#' pages of results, such as the next page and the last page. These assorted
	#' URLs are catenated in a single string and this function converts that
	#' information into a data.frame that is useful for traversing the pages.
	#'
	#' @param x Output of a function that gets potentially paginated results, e.g.,
	#' \code{get.*.repositories()}
	#'
	#' @return A data.frame, one row per URL = page. Maximum number of rows is four:
	#' one each for "next", "last", "first", and "prev" page, indicated by the
	#' \code{rel} variable. The \code{per_page} variable will be constant across
	#' all rows and gives the number of items per page. If header contains no
	#' links at all, the return value is NULL and a message is given.
	#'
	#' @references
	#' \url{https://developer.github.com/guides/traversing-with-pagination/}
	#' \url{https://developer.github.com/v3/#pagination}
	#'
	#' @examples
	#' repos <- get.organization.repositories(org = "STAT545-UBC", per_page = 1)
	#' digest_header_links(repos)
	digest_header_links <- function(x) {
	y <- x$headers$link
	if(is.null(y)) {
	message("No links found in header.")
	return(NULL)
	}
	y %>%
	str_split(", ") %>% unlist %>% # split into e.g. next, last, first, prev
	str_split_fixed("; ", 2) %>% # separate URL from the relation
	plyr::alply(2) %>% # workaround: make into a list
	dplyr::as_data_frame %>% # convert to data.frame, no factors!
	setNames(c("URL", "rel")) %>% # sane names
	dplyr::mutate_(rel = ~ str_match(rel, "next\|last\|first\|prev"),
	per_page = ~ str_match(URL, "per_page=([0-9]+)") %>%
	`[`( , 2) %>% as.integer,
	page = ~ str_match(URL, "&page=([0-9]+)") %>%
	`[`( , 2) %>% as.integer,
	URL = ~ str_replace_all(URL, "<\|>", ""))
	}