Created
December 5, 2017 07:56
-
-
Save thoughtfulbloke/1b2eeb054b5fa1e1e16b840e6d01f7a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(OECD) | |
library(feather) | |
library(dplyr) | |
library(tidyr) | |
library(purrr) | |
library(broom) | |
library(ggplot2) | |
library(viridis) | |
dataFolder= "traffic_data" | |
dataset <- "FAMILY" | |
familyfile <- paste0(dataFolder,"/OECD_",dataset, ".feather") | |
if(!dir.exists(dataFolder)){ | |
dir.create(dataFolder) | |
} | |
if(!file.exists(familyfile)){ | |
dstruc <- get_data_structure(dataset) | |
df <- get_dataset(dataset) | |
df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id") | |
names(df2)[9] <- "UNIT_label" | |
df3 <- merge(df2, dstruc$IND, by.x="IND", by.y="id") | |
names(df3)[10] <- "IND_label" | |
write_feather(df3, path=familyfile) | |
} | |
rm(list = setdiff(ls(), c('familyfile'))) | |
FAM <- read_feather(familyfile) | |
# check indicator names | |
FAM %>% select(IND, IND_label) %>% distinct() %>% View() | |
country_model <- function(df) { | |
lm(dependent ~ independent, data = df) | |
} | |
get_slope <- function(mdL) { | |
mdL$coefficients[2] | |
} | |
FAM %>% filter(IND == "FAM16A" | IND == "FAM4B") %>% | |
mutate(obsTime = as.integer(obsTime)) %>% | |
select(COU, obsTime, IND, obsValue) %>% | |
arrange(COU, obsTime) %>% spread(IND, obsValue) %>% | |
filter(!is.na(FAM16A), !is.na(FAM4B)) %>% glimpse() %>% | |
arrange(COU, obsTime) %>% group_by(COU) %>% | |
mutate(independent = FAM4B, | |
dependent = FAM16A) %>% | |
nest() %>% | |
mutate(model = map(data, country_model), | |
slope = map(model, get_slope), | |
glance = map(model, broom::glance)) %>% | |
unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope) %>% | |
mutate(state = if_else(COU=="NZL", "New Zealand", "Everyone Else")) %>% | |
glimpse() %>% | |
ggplot(aes(x=slope, y=r.squared, colour=state)) + geom_point() + | |
ylim(0,1) + xlab("model slope (further from 0 is more dramatic relationship)") + | |
ylab("r.squared (higher is data fits model better)") + | |
ggtitle("1604 obs from 39 countries (data OECD:FAMILY dataset)\nAs divorce rates increase infant mortality decreases") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment