Created
April 5, 2020 03:27
-
-
Save vincentarelbundock/094afde1ae658f29965aac7d08114324 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "COVID-19 Worldometer data" | |
output: html_notebook | |
--- | |
This notebook pulls a table with useful information out of  and then makes a graph. | |
```{r} | |
library(WDI) | |
library(countrycode) | |
library(rvest) | |
library(tidyverse) | |
library(modelsummary) | |
# load GDP data from the world bank | |
wb <- WDI(indicator = c('gdppc' = "NY.GDP.PCAP.CD"), country = 'all', | |
start = 2018, end = 2018, extra = TRUE) %>% | |
mutate(equator_km = abs(as.numeric(as.character(latitude))) * 111) %>% | |
select(iso3c, gdppc, equator_km, latitude) | |
#load html data | |
dat <- read_html("https://www.worldometers.info/coronavirus/") %>% | |
html_node("#main_table_countries_today") %>% | |
html_table() %>% | |
rename(Test_1m = "Tests/1M pop", | |
Country = "Country,Other") %>% | |
filter(Country != "World" & Country != "Total:") %>% | |
na_if("") %>% | |
select(-contains("New")) %>% | |
mutate_at(vars(-Country), ~str_remove_all(., ",")) %>% | |
mutate_at(vars(-Country), ~as.numeric(.)) %>% | |
mutate(iso3c = countrycode(Country, 'country.name', 'iso3c')) %>% | |
left_join(wb, by = 'iso3c') | |
dat | |
``` | |
# Graph of test vs cases | |
```{r} | |
dat %>% | |
ggplot(aes(x = TotalTests, y = TotalCases)) + | |
geom_point() + | |
#geom_smooth(method = "lm", se = F) + | |
scale_y_continuous(trans = "log2", breaks = 2^seq(1, 20, 2)) + | |
scale_x_continuous(trans = "log2", breaks = 2^seq(1, 20, 2)) + | |
theme_minimal() + | |
labs(x = "Total Number of Tests", | |
y = "Total Number of Cases", | |
title = "Cross-national COVID-19 case and test counts correlate closely", | |
caption = "Data from Worldometer: https://www.worldometers.info/coronavirus/") + | |
ggsave("Tests.png", width = 8, height = 5) | |
``` | |
# Linear regression models | |
```{r} | |
models <- list() | |
models[['Bivariate']] <- lm(TotalCases ~ TotalTests, dat) | |
models[['Controls']] <- lm(TotalCases ~ TotalTests + I(log(gdppc)) + equator_km, dat) | |
cm <- c('TotalTests' = '# of Tests', | |
'I(log(gdppc))' = 'log(GDP/cap)', | |
'equator_km' = 'Distance from equator', | |
'(Intercept)' = 'Constant') | |
msummary(models, coef_map = cm, title = 'Dependent variable: Total number of covid-19 cases.') | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment