sbfnk · November 26, 2021 17:02
diff --git a/get_covid19_nowcasts_all_times.r b/get_covid19_nowcasts_all_times.r
 ##' Get estimates shown at \url{https://epiforecasts.io/covid} for all times
 ##'
 ##' Nowcasts at \url{https://epiforecasts.covid} and the related github repository at
 ##' \url{https://github.com/epiforecasts/covid-rt-estimates} only cover the last 3
 ##' months. This function downloads all available estimates and applies a median
 ##' average to the provided quantiles to provide an estimate of a time series
 ##' covering all times available
 ##'
 ##' @param dataset character; data set corresponding to directories at
 ##' \url{https://github.com/epiforecasts/covid-rt-estimates}. Default is
 ##' \code{"national/cases"}, other examples of possible values are:
 ##' \code{"subnational/brazil/cases"} or
 ##' \code{"subnational/united-kingdom/admission"}
 ##' @param variable character; quantity to consider; by default: "rt", but
 ##' could also be, e.g.. "growth_rate", or "cases_by_infection"; corresponds to
 ##' the name of \code{.csv} files in the summary directories at
 ##' \url{https://github.com/epiforecasts/covid-rt-estimates}
 ##' @param earliest_date character; the earliest date to consider; by default:
 ##' "30 August 2020" (the day after estimates started being produced using
 ##' EpiNow2). It is not recommended to set this to an earlier date as it would
 ##' lead to a mixture of methods.
 ##' @return a list of data frames containing estimates for all times
 ##' @importFrom gh gh
 ##' @importFrom readr read_csv
 ##' @importFrom dplyr bind_rows group_by summarise
 ##' @importFrom tidyr pivot_longer pivot_wider
 ##' @author Sebastian Funk
 get_covid19_nowcasts_all_times <-
  function(dataset = "national/cases", variable = "rt",
           earliest_date = "2020-08-30") {
  owner <- "epiforecasts"
  repo <- "covid-rt-estimates"
  path <- paste(dataset, "summary", paste0(variable, ".csv"), sep = "/")

  query <- "/repos/{owner}/{repo}/commits?path={path}"

  if (!is.null(earliest_date)) {
    query <- paste0(query, "&since={date}")
  }

  commits <-
    gh::gh(query,
           owner = owner,
           repo = repo,
           path = path,
           date = earliest_date,
           .limit = Inf)

  shas <- vapply(commits, "[[", "", "sha")

  estimates <-
    lapply(
      shas,
      function(sha)
        readr::read_csv(
                 paste("https://raw.githubusercontent.com", owner, repo,
                       sha, path, sep = "/"))
    )

  median_estimates <- estimates %>%
    bind_rows() %>%
    filter(type == "estimate") %>%
    select(-strat, -type, -mean, -sd) %>%
    pivot_longer(c(median, starts_with("lower_"), starts_with("upper_"))) %>%
    group_by_at(vars(1, "date", "name")) %>%
    summarise(value = median(value), .groups = "drop") %>%
    pivot_wider()

  return(median_estimates)
 }

 library("dplyr")
 library("readr")
 library("tidyr")
 library("gh")

 rt_brazil <- get_covid19_nowcasts_all_times("subnational/brazil/cases")
	##' Get estimates shown at \url{https://epiforecasts.io/covid} for all times
	##'
	##' Nowcasts at \url{https://epiforecasts.covid} and the related github repository at
	##' \url{https://github.com/epiforecasts/covid-rt-estimates} only cover the last 3
	##' months. This function downloads all available estimates and applies a median
	##' average to the provided quantiles to provide an estimate of a time series
	##' covering all times available
	##'
	##' @param dataset character; data set corresponding to directories at
	##' \url{https://github.com/epiforecasts/covid-rt-estimates}. Default is
	##' \code{"national/cases"}, other examples of possible values are:
	##' \code{"subnational/brazil/cases"} or
	##' \code{"subnational/united-kingdom/admission"}
	##' @param variable character; quantity to consider; by default: "rt", but
	##' could also be, e.g.. "growth_rate", or "cases_by_infection"; corresponds to
	##' the name of \code{.csv} files in the summary directories at
	##' \url{https://github.com/epiforecasts/covid-rt-estimates}
	##' @param earliest_date character; the earliest date to consider; by default:
	##' "30 August 2020" (the day after estimates started being produced using
	##' EpiNow2). It is not recommended to set this to an earlier date as it would
	##' lead to a mixture of methods.
	##' @return a list of data frames containing estimates for all times
	##' @importFrom gh gh
	##' @importFrom readr read_csv
	##' @importFrom dplyr bind_rows group_by summarise
	##' @importFrom tidyr pivot_longer pivot_wider
	##' @author Sebastian Funk
	get_covid19_nowcasts_all_times <-
	function(dataset = "national/cases", variable = "rt",
	earliest_date = "2020-08-30") {
	owner <- "epiforecasts"
	repo <- "covid-rt-estimates"
	path <- paste(dataset, "summary", paste0(variable, ".csv"), sep = "/")

	query <- "/repos/{owner}/{repo}/commits?path={path}"

	if (!is.null(earliest_date)) {
	query <- paste0(query, "&since={date}")
	}

	commits <-
	gh::gh(query,
	owner = owner,
	repo = repo,
	path = path,
	date = earliest_date,
	.limit = Inf)

	shas <- vapply(commits, "[[", "", "sha")

	estimates <-
	lapply(
	shas,
	function(sha)
	readr::read_csv(
	paste("https://raw.githubusercontent.com", owner, repo,
	sha, path, sep = "/"))
	)

	median_estimates <- estimates %>%
	bind_rows() %>%
	filter(type == "estimate") %>%
	select(-strat, -type, -mean, -sd) %>%
	pivot_longer(c(median, starts_with("lower_"), starts_with("upper_"))) %>%
	group_by_at(vars(1, "date", "name")) %>%
	summarise(value = median(value), .groups = "drop") %>%
	pivot_wider()

	return(median_estimates)
	}

	library("dplyr")
	library("readr")
	library("tidyr")
	library("gh")

	rt_brazil <- get_covid19_nowcasts_all_times("subnational/brazil/cases")