Skip to content

Instantly share code, notes, and snippets.

@seabbs
Created May 7, 2021 14:50
Show Gist options
  • Save seabbs/284f0b73d54ce3977fbd7d9c202aa428 to your computer and use it in GitHub Desktop.
Save seabbs/284f0b73d54ce3977fbd7d9c202aa428 to your computer and use it in GitHub Desktop.
An overview of the features included in the 0.9.1 release of covidregionaldata. See the release notes for more details: https://github.com/epiforecasts/covidregionaldata/releases/tag/v0.9.1
library(covidregionaldata)
# set up a data cache
start_using_memoise()
#> Using a cache at: /tmp/RtmphiSeWY
# check for supported countries
get_available_datasets()
#> # A tibble: 18 x 8
#> origin class level_1_region level_2_region level_3_region type data_urls
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Belgium Belg… region province <NA> regi… https://ep…
#> 2 Brazil Braz… state city <NA> regi… https://gi…
#> 3 Canada Cana… province <NA> <NA> regi… https://he…
#> 4 Colombia Colo… departamento <NA> <NA> regi… https://ra…
#> 5 Cuba Cuba provincia <NA> <NA> regi… https://co…
#> 6 Europea… ECDC country <NA> <NA> nati… https://op…
#> 7 France Fran… region department <NA> regi… https://ww…
#> 8 Germany Germ… bundesland landkreis <NA> regi… https://op…
#> 9 Google Goog… country subregion subregion2 nati… https://st…
#> 10 India India state <NA> <NA> regi… https://ap…
#> 11 Italy Italy regioni <NA> <NA> regi… https://ra…
#> 12 John Ho… JHU country region <NA> nati… https://ra…
#> 13 Lithuan… Lith… county municipality <NA> regi… https://op…
#> 14 Mexico Mexi… estado municipio <NA> regi… Downloads/…
#> 15 South A… Sout… province <NA> <NA> regi… https://ra…
#> 16 United … UK region authority <NA> regi… https://ww…
#> 17 United … USA state county <NA> regi… https://ra…
#> 18 World H… WHO country <NA> <NA> nati… https://co…
#> # … with 1 more variable: source_data_cols <chr>
# get national level data from the WHO
get_national_data(source = "who")
#> Downloading data from https://covid19.who.int/WHO-COVID-19-global-data.csv
#> Rows: 116,255
#> Columns: 8
#> Delimiter: ","
#> chr [3]: Country_code, Country, WHO_region
#> dbl [4]: New_cases, Cumulative_cases, New_deaths, Cumulative_deaths
#> date [1]: Date_reported
#>
#> Use `spec()` to retrieve the guessed column specification
#> Pass a specification to the `col_types` argument to quiet this message
#> Cleaning data
#> Processing data
#> # A tibble: 116,366 x 15
#> date un_region who_region country iso_code cases_new cases_total
#> <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 2020-01-03 Asia EMRO Afghanistan AF 0 0
#> 2 2020-01-03 Europe EURO Albania AL 0 0
#> 3 2020-01-03 Africa AFRO Algeria DZ 0 0
#> 4 2020-01-03 Oceania WPRO American Samoa AS 0 0
#> 5 2020-01-03 Europe EURO Andorra AD 0 0
#> 6 2020-01-03 Africa AFRO Angola AO 0 0
#> 7 2020-01-03 Americas AMRO Anguilla AI 0 0
#> 8 2020-01-03 Americas AMRO Antigua & Bar… AG 0 0
#> 9 2020-01-03 Americas AMRO Argentina AR 0 0
#> 10 2020-01-03 Asia EURO Armenia AM 0 0
#> # … with 116,356 more rows, and 8 more variables: deaths_new <dbl>,
#> # deaths_total <dbl>, recovered_new <dbl>, recovered_total <dbl>,
#> # hosp_new <dbl>, hosp_total <dbl>, tested_new <dbl>, tested_total <dbl>
# get national level data from Google
get_national_data(source = "google", verbose = FALSE)
#> # A tibble: 1,586,497 x 21
#> date country iso_3166_1_alpha… cases_new cases_total deaths_new
#> <date> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2002-10-09 Afghanistan AFG NA NA NA
#> 2 2002-10-09 Albania ALB NA NA NA
#> 3 2002-10-09 Algeria DZA NA NA NA
#> 4 2002-10-09 American Samoa ASM NA NA NA
#> 5 2002-10-09 Andorra AND NA NA NA
#> 6 2002-10-09 Angola AGO NA NA NA
#> 7 2002-10-09 Anguilla AIA NA NA NA
#> 8 2002-10-09 Antigua and Ba… ATG NA NA NA
#> 9 2002-10-09 Argentina ARG NA NA NA
#> 10 2002-10-09 Armenia ARM NA NA NA
#> # … with 1,586,487 more rows, and 15 more variables: deaths_total <dbl>,
#> # recovered_new <dbl>, recovered_total <dbl>, hosp_new <dbl>,
#> # hosp_total <dbl>, tested_new <dbl>, tested_total <dbl>,
#> # aggregation_level <dbl>, current_hospitalized <dbl>,
#> # new_intensive_care <dbl>, total_intensive_care <dbl>,
#> # current_intensive_care <dbl>, new_ventilator <dbl>, total_ventilator <dbl>,
#> # current_ventilator <dbl>
# get national level data from the JHU
get_national_data(source = "JHU", verbose = FALSE)
#> # A tibble: 106,446 x 16
#> date country iso_3166_1_alpha… cases_new cases_total deaths_new
#> <date> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2020-01-22 Afghanistan AFG 0 0 0
#> 2 2020-01-22 Albania ALB 0 0 0
#> 3 2020-01-22 Algeria DZA 0 0 0
#> 4 2020-01-22 Andorra AND 0 0 0
#> 5 2020-01-22 Angola AGO 0 0 0
#> 6 2020-01-22 Antigua and Ba… ATG 0 0 0
#> 7 2020-01-22 Argentina ARG 0 0 0
#> 8 2020-01-22 Armenia ARM 0 0 0
#> 9 2020-01-22 Australia AUS 0 0 0
#> 10 2020-01-22 Austria AUT 0 0 0
#> # … with 106,436 more rows, and 10 more variables: deaths_total <dbl>,
#> # recovered_new <dbl>, recovered_total <dbl>, hosp_new <dbl>,
#> # hosp_total <dbl>, tested_new <dbl>, tested_total <dbl>,
#> # level_2_region_code <dbl>, Lat <dbl>, Long <dbl>
# get regional level data for the UK with NHS regions data
# here we turn off all optional processing steps
get_regional_data("uk", nhsregions = TRUE, process_fns = c(), verbose = FALSE)
#> # A tibble: 5,093 x 16
#> date region region_code cases_new cases_total deaths_new deaths_total
#> <date> <chr> <lgl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-01-30 East of… NA NA NA NA NA
#> 2 2020-01-30 England NA 2 2 NA NA
#> 3 2020-01-30 London NA NA NA NA NA
#> 4 2020-01-30 Midlands NA NA NA NA NA
#> 5 2020-01-30 North E… NA 1 1 NA NA
#> 6 2020-01-30 North W… NA NA NA NA NA
#> 7 2020-01-30 Norther… NA NA NA NA NA
#> 8 2020-01-30 Scotland NA NA NA NA NA
#> 9 2020-01-30 South E… NA NA NA NA NA
#> 10 2020-01-30 South W… NA NA NA NA NA
#> # … with 5,083 more rows, and 9 more variables: recovered_new <dbl>,
#> # recovered_total <dbl>, hosp_new <dbl>, hosp_total <dbl>, tested_new <dbl>,
#> # tested_total <dbl>, hosp_new_first_admissions <dbl>, hosp_new_blend <dbl>,
#> # release_date <date>
# initialise the Google data source for level 2 data
google <- initialise_dataclass("google", level = 2)
# download the Google source (here uses the cache defined above)
google$download()
#> Downloading data from https://storage.googleapis.com/covid19-open-data/v2/epidemiology.csv
#> Downloading data from https://storage.googleapis.com/covid19-open-data/v2/hospitalizations.csv
#> Downloading data from https://storage.googleapis.com/covid19-open-data/v2/index.csv
# clean the Google data
google$clean()
#> Cleaning data
# show available regions with level 2 data
google$available_regions()
#> [1] "Argentina" "Spain"
#> [3] "Mexico" "France"
#> [5] "Switzerland" "Germany"
#> [7] "Indonesia" "Thailand"
#> [9] "United States of America" "Japan"
#> [11] "South Korea" "Czech Republic"
#> [13] "China" "Ukraine"
#> [15] "Philippines" "Australia"
#> [17] "Canada" "Malaysia"
#> [19] "Taiwan" "United Kingdom"
#> [21] "Sweden" "Estonia"
#> [23] "Italy" "Brazil"
#> [25] "Austria" "Portugal"
#> [27] "Belgium" "Chile"
#> [29] "Colombia" "Peru"
#> [31] "Israel" "Netherlands"
#> [33] "India" "Poland"
#> [35] "Haiti" "Norway"
#> [37] "Afghanistan" "Mozambique"
#> [39] "Russia" "South Africa"
#> [41] "Sierra Leone" "Romania"
#> [43] "Democratic Republic of the Congo" "Venezuela"
#> [45] "Sudan" "Pakistan"
#> [47] "Kenya" "Iraq"
#> [49] "Bangladesh" "Libya"
# filter to UK data available from Google
google$filter("united kingdom")
#> Filtering data to: United Kingdom
# process UK data
google$process()
#> Processing data
# return UK data
google$return()
#> # A tibble: 6,874 x 23
#> date country iso_3166_1_alph… subregion iso_code cases_new cases_total
#> <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 2020-01-01 United … GBR East Mid… UKF NA NA
#> 2 2020-01-01 United … GBR East of … UKH NA NA
#> 3 2020-01-01 United … GBR England ENG NA NA
#> 4 2020-01-01 United … GBR London R… UKI NA NA
#> 5 2020-01-01 United … GBR North Ea… UKC NA NA
#> 6 2020-01-01 United … GBR North We… UKD NA NA
#> 7 2020-01-01 United … GBR Northern… NIR NA NA
#> 8 2020-01-01 United … GBR Scotland SCT NA NA
#> 9 2020-01-01 United … GBR South Ea… UKJ NA NA
#> 10 2020-01-01 United … GBR South We… UKK NA NA
#> # … with 6,864 more rows, and 16 more variables: deaths_new <dbl>,
#> # deaths_total <dbl>, recovered_new <dbl>, recovered_total <dbl>,
#> # hosp_new <dbl>, hosp_total <dbl>, tested_new <dbl>, tested_total <dbl>,
#> # aggregation_level <dbl>, current_hospitalized <dbl>,
#> # new_intensive_care <dbl>, total_intensive_care <dbl>,
#> # current_intensive_care <dbl>, new_ventilator <dbl>, total_ventilator <dbl>,
#> # current_ventilator <dbl>
#' Created on 2021-05-07 by the reprex package v2.0.0 <https://reprex.tidyverse.org>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment