Last active
May 10, 2018 00:08
-
-
Save benmarwick/682aa297c280bbb5e4a79b01a07d8eab to your computer and use it in GitHub Desktop.
sketch for ffsg change point analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Change point analysis | |
library(tidyverse) | |
# get the data of population per state and fatalities per state | |
source(here::here("/Analysis/Tables/permillcalculation.R")) | |
# compute fatalities per 1000 peple per year per state, using | |
# the custom function in permillcalculation.R | |
ratio_data <- permillcalc() | |
# get one state as a vector to test | |
alabama <- | |
ratio_data %>% | |
filter(state_name == "Alabama") %>% | |
select(p2000:p2017) %>% | |
unlist(., use.names=FALSE) | |
years <- readr::parse_number(names(ratio_data)[-c(1,2,ncol(ratio_data))]) | |
# take a quick look | |
plot(alabama, type = "b") | |
# plain change point analysis | |
library(changepoint) | |
cpt_test <- cpt.mean(alabama, class=FALSE, method="BinSeg") | |
# change points are at: | |
years[cpt_test] | |
# plot time series with change points from changepoint | |
ggplot(data_frame(values = alabama, | |
year = years), | |
aes(year, | |
values)) + | |
geom_line() + | |
geom_vline(xintercept = years[cpt_test], | |
colour = "blue") + | |
theme_minimal() | |
# bayesian change point analysis | |
library(bcp) | |
bcp_out <- bcp(alabama) | |
bcp_out$data | |
post <- bcp_out$posterior.prob | |
plot(bcp_out) | |
# plot time series with change points from bcp | |
ggplot(data_frame(values = alabama, | |
year = years), | |
aes(year, | |
values)) + | |
geom_line() + | |
geom_vline(xintercept = years[which(post > 0.2)], | |
colour = "red") + | |
theme_minimal() | |
# Plot both changepoint and bcp methods | |
ggplot(data_frame(values = alabama, | |
year = years), | |
aes(year, | |
values)) + | |
geom_line() + | |
geom_vline(xintercept = years[which(post > 0.2)], | |
colour = "red") + | |
geom_vline(xintercept = years[cpt_test], | |
colour = "blue") + | |
theme_minimal() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#----------------------------------------------------------------------------- | |
# time series cluster analysis | |
# https://cran.r-project.org/web/packages/dtwclust/vignettes/dtwclust.pdf | |
library(dtwclust) | |
state_data <- | |
ratio_data %>% | |
select(p2000:p2017) %>% | |
slice(1:51) %>% | |
as.data.frame() | |
row.names(state_data) <- as.character(ratio_data$state_name[-nrow(ratio_data)]) | |
hc_sbd <- tsclust(state_data, type = "h", k = 20L, | |
preproc = zscore, seed = 899, | |
distance = "sbd", centroid = shape_extraction, | |
control = hierarchical_control(method = "average")) | |
# By default, the dendrogram is plotted in hierarchical clustering | |
plot(hc_sbd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment