Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active May 10, 2018 00:08
Show Gist options
  • Save benmarwick/682aa297c280bbb5e4a79b01a07d8eab to your computer and use it in GitHub Desktop.
Save benmarwick/682aa297c280bbb5e4a79b01a07d8eab to your computer and use it in GitHub Desktop.
sketch for ffsg change point analysis
# Change point analysis
library(tidyverse)
# get the data of population per state and fatalities per state
source(here::here("/Analysis/Tables/permillcalculation.R"))
# compute fatalities per 1000 peple per year per state, using
# the custom function in permillcalculation.R
ratio_data <- permillcalc()
# get one state as a vector to test
alabama <-
ratio_data %>%
filter(state_name == "Alabama") %>%
select(p2000:p2017) %>%
unlist(., use.names=FALSE)
years <- readr::parse_number(names(ratio_data)[-c(1,2,ncol(ratio_data))])
# take a quick look
plot(alabama, type = "b")
# plain change point analysis
library(changepoint)
cpt_test <- cpt.mean(alabama, class=FALSE, method="BinSeg")
# change points are at:
years[cpt_test]
# plot time series with change points from changepoint
ggplot(data_frame(values = alabama,
year = years),
aes(year,
values)) +
geom_line() +
geom_vline(xintercept = years[cpt_test],
colour = "blue") +
theme_minimal()
# bayesian change point analysis
library(bcp)
bcp_out <- bcp(alabama)
bcp_out$data
post <- bcp_out$posterior.prob
plot(bcp_out)
# plot time series with change points from bcp
ggplot(data_frame(values = alabama,
year = years),
aes(year,
values)) +
geom_line() +
geom_vline(xintercept = years[which(post > 0.2)],
colour = "red") +
theme_minimal()
# Plot both changepoint and bcp methods
ggplot(data_frame(values = alabama,
year = years),
aes(year,
values)) +
geom_line() +
geom_vline(xintercept = years[which(post > 0.2)],
colour = "red") +
geom_vline(xintercept = years[cpt_test],
colour = "blue") +
theme_minimal()
#-----------------------------------------------------------------------------
# time series cluster analysis
# https://cran.r-project.org/web/packages/dtwclust/vignettes/dtwclust.pdf
library(dtwclust)
state_data <-
ratio_data %>%
select(p2000:p2017) %>%
slice(1:51) %>%
as.data.frame()
row.names(state_data) <- as.character(ratio_data$state_name[-nrow(ratio_data)])
hc_sbd <- tsclust(state_data, type = "h", k = 20L,
preproc = zscore, seed = 899,
distance = "sbd", centroid = shape_extraction,
control = hierarchical_control(method = "average"))
# By default, the dendrogram is plotted in hierarchical clustering
plot(hc_sbd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment