Last active
November 27, 2017 05:40
-
-
Save thoughtfulbloke/7076a6753f8778c1314570e8fe5d6673 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(OECD) | |
library(feather) | |
library(dplyr) | |
library(countrycode) | |
library(tidyr) | |
library(purrr) | |
library(broom) | |
library(ggplot2) | |
library(viridis) | |
# create folder for storing data if it does not already exist locally | |
dataFolder= "traffic_data" | |
if(!dir.exists(dataFolder)){ | |
dir.create(dataFolder) | |
} | |
# get road accident data if it does not already exist locally | |
dataset <- "ITF_ROAD_ACCIDENTS" | |
accidentfile <- paste0(dataFolder,"/OECD_",dataset, ".feather") | |
if(!file.exists(accidentfile)){ | |
dstruc <- get_data_structure(dataset) | |
df <- get_dataset(dataset) | |
df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id") | |
names(df2)[9] <- "UNIT_label" | |
df3 <- merge(df2, dstruc$VARIABLE, by.x="VARIABLE", by.y="id") | |
names(df3)[10] <- "VARIABLE_label" | |
write_feather(df3, path=accidentfile) | |
} | |
# get average work hours data if it does not exist locally | |
dataset <- "ANHRS" | |
workfile <- paste0(dataFolder,"/OECD_",dataset, ".feather") | |
if(!file.exists(workfile)){ | |
dstruc <- get_data_structure(dataset) | |
df <- get_dataset(dataset) | |
df2 <- merge(df, dstruc$EMPSTAT, by.x="EMPSTAT", by.y="id") | |
names(df2)[7] <- "EMPSTAT_label" | |
write_feather(df, path=workfile) | |
} | |
rm(list = setdiff(ls(), c('accidentfile','workfile'))) | |
# read in accident data | |
ITF_ROAD_ACCIDENTS <- read_feather(accidentfile) %>% | |
filter(VARIABLE == "T-ACCI-KIL") %>% | |
mutate(obsTime = as.integer(obsTime)) %>% | |
select(COUNTRY, obsTime, toll = obsValue) | |
# read in work hours | |
OECD <- read_feather(workfile) | |
# combine fatalities from the accident data with the population data | |
combo <- OECD %>% | |
filter(EMPSTAT == "TE") %>% | |
mutate(obsTime = as.integer(obsTime)) %>% | |
select(COUNTRY, obsTime, work = obsValue) %>% | |
inner_join(ITF_ROAD_ACCIDENTS) | |
# make a lot of linear models (one for each country) and get their summaries | |
country_model <- function(df) { | |
lm(dependent ~ independent, data = df) | |
} | |
get_slope <- function(mdL) { | |
if (mdL$coefficients[2] < 0){ | |
return(-1) | |
} else{ | |
return(1) | |
} | |
} | |
comparisons <- combo %>% | |
arrange(COUNTRY, obsTime) %>% group_by(COUNTRY) %>% | |
mutate(independent = work, | |
dependent = toll) %>% | |
nest() %>% | |
mutate(model = map(data, country_model), | |
slope = map(model, get_slope), | |
glance = map(model, broom::glance)) %>% | |
unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope) | |
comparisons %>% | |
mutate(state = if_else(COUNTRY=="NZL", "New Zealand", "Everyone Else")) %>% | |
ggplot(aes(x=slope, y=r.squared, colour=state, shape=state)) + geom_point() + | |
ggtitle("Road Toll ~ Work hours, 1177 obs, 36 countries (data ITF/OECD)") + | |
ylim(0,1) + xlim(-1, 1) + | |
scale_color_viridis(discrete=TRUE, end=0.9) + theme_bw() + | |
xlab("Significance of model") + ylab("Fit of Data (r squared)") + | |
geom_vline(xintercept=0.95, colour="red") + | |
geom_vline(xintercept=-0.95, colour="red") + | |
annotate(geom = "text", x = 0.90, y = 0.5, | |
label = "0.95 Significance, Postive slope", | |
color = "red", angle = -90) + | |
annotate(geom = "text", x = -0.90, y = 0.5, | |
label = "0.95 Significance, Negative slope", | |
color = "red", angle = 90) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment