In this gist, we replicate two simulations by (Liu et al. 2013), and
show how the R
-package densityratio
can be used for change-point
detection in time-series data. To do so, we first load the required
packages, and set the future
environment to do parallel processing.
library(furrr)
#> Loading required package: future
library(densityratio)
plan(multisession(workers = 18))
We generate
such that the changepoints become more pronounced as time
progresses. In the second simulation, the residuals are distributed as a
normal
such that again the changepoints become more pronounced as time progresses.
set.seed(1)
y1 <- y2 <- numeric(4999)
mu <- numeric(50)
for (n in 2:50) mu[n] <- mu[n-1] + n/16
for (t in 3:4999) {
mu_t <- mu[t%/%100 + 1]
y1[t] <- 0.6 * y1[t-1] - 0.5 * y1[t-2] + rnorm(1, mu_t, 1.5)
}
sigma <- numeric(50)
for (n in 1:50) {
if (n %% 2 == 0) {
sigma[n] <- log(exp(1) + n/4)
} else {
sigma[n] <- 1
}
}
for (t in 3:4999) {
sigma_t <- sigma[t%/%100 + 1]
y2[t] <- 0.6 * y2[t-1] - 0.5 * y2[t-2] + rnorm(1, 0, sigma_t)
}
plot(4000:4999, y1[4000:4999], type = "l", col = "blue")
abline(v = c(40:50*100))
plot(4000:4999, y2[4000:4999], type = "l", col = "blue")
abline(v = c(40:50*100))
If we plot the distribution of the data sets, we can indeed see a shift in the distribution at the specified timepoints.
The next step is to estimate the density ratio between all neighbouring data sets. That is, we estimated the density ratio between the points
k <- 5
n <- 50
Y1t <- purrr::map(
1:length(y1),
~y1[.x:min(.x+k - 1, length(y1))]
)
curly_Y1t <- purrr::map(
3500:4499,
function(t) {
dat1 <- Y1t[(t-n):(t-1)]
dat2 <- Y1t[(t):(t+n-1)]
names(dat1) <- names(dat2) <- paste0("X", 1:n)
list(data.frame(dat1), data.frame(dat2))
}
)
Y2t <- purrr::map(
1:length(y2),
~y2[max(1, .x-k):.x]
)
curly_Y2t <- purrr::map(
3500:4499,
function(t) {
dat1 <- Y2t[(t-n+1):t]
dat2 <- Y2t[(t+1):(t+n)]
names(dat1) <- names(dat2) <- paste0("X", 1:n)
list(data.frame(dat1), data.frame(dat2))
}
)
fits1 <- furrr::future_map_dbl(
1:(length(curly_Y1t) - 1),
function(t) {
dat <- unlist(curly_Y1t[t], recursive = FALSE)
dat1 <- t(dat[[1]])
dat2 <- t(dat[[2]])
fit <- ulsif(
dat1,
dat2,
sigma_quantile = c(0.3, 0.4, 0.5, 0.6, 0.7),
lambda = 10^{-3:1},
ncenters = 50
)
fit_rec <- ulsif(
dat2,
dat1,
sigma_quantile = c(0.3, 0.4, 0.5, 0.6, 0.7),
lambda = 10^{-3:1},
ncenters = 50
)
PE <- summary(fit)$PE + summary(fit_rec)$PE
PE
}, .progress = TRUE, .options = furrr_options(seed = TRUE))
fits2 <- furrr::future_map_dbl(
1:(length(curly_Y2t) - 1),
function(t) {
dat <- unlist(curly_Y2t[t], recursive = FALSE)
dat1 <- t(dat[[1]])
dat2 <- t(dat[[2]])
fit <- ulsif(
dat1,
dat2,
sigma_quantile = c(0.3, 0.4, 0.5, 0.6, 0.7),
lambda = 10^{-3:1},
ncenters = 50
)
fit_rec <- ulsif(
dat2,
dat1,
sigma_quantile = c(0.3, 0.4, 0.5, 0.6, 0.7),
lambda = 10^{-3:1},
ncenters = 50
)
PE <- summary(fit)$PE + summary(fit_rec)$PE
PE
}, .progress = TRUE, .options = furrr_options(seed = TRUE))
plot(3501:4499, fits1)
abline(v = c(36:44*100))
plot(3501:4499, fits2)
abline(v = c(36:44*100))
Created on 2023-10-30 with reprex v2.0.2
Liu, Song, Makoto Yamada, Nigel Collier, and Masashi Sugiyama. 2013. “Change-Point Detection in Time-Series Data by Relative Density-Ratio Estimation.” Neural Networks 43 (January): 72–83. https://doi.org/10.1016/j.neunet.2013.01.012.