Skip to content

Instantly share code, notes, and snippets.

@Syncrossus
Last active September 5, 2019 08:42
Show Gist options
  • Save Syncrossus/7ca61a35127dfa476d40287a6a3e5d49 to your computer and use it in GitHub Desktop.
Save Syncrossus/7ca61a35127dfa476d40287a6a3e5d49 to your computer and use it in GitHub Desktop.
Fisher's dynamic programming algorithm for time series clustering in R
# Fisher's criterion computation function
# arguments :
# clusters : list as returned by clustfisher (see below)
# return :
# the value of the criterion
fisherCriterion <- function(clusters){
diameterSum <- 0
if(length(clusters$instants)==1){
diameterSum <- sum(clusters$D[clusters$instants[1:length(clusters$instants)-1], clusters$instants[2:length(clusters$instants)]-1])
}
return(clusters$D[1, clusters$instants[1]-1] + diameterSum + clusters$D[clusters$instants[length(clusters$instants)], clusters$n])
}
# Diameter matrix computation function as defined in Fisher's algorithm
# arguments :
# sequence : the dataset
# return :
# The Diameter matrix
diameters <- function(sequence){
n <- nrow(sequence)
D <- matrix(data = 0, nrow = n, ncol = n)
#diameter triangular matrix computation
for(a in 1:n){
for(b in a:n){
if(dim(sequence)[2]==1){ # handling 1 dimension datasets that otherwise crash colSums(sequence[a:b, ])
Mu_ab <- sum(sequence[a:b, ]) / (b - a + 1)
}else if(is.null(dim(sequence[a:b, ]))){ # handling a=b, which otherwise crashes colSums(sequence[a:b, ])
Mu_ab <- sequence[a:b, ]
}else{
Mu_ab <- colSums(sequence[a:b, ]) / (b - a + 1)
}
# - double transposing is required because R is weird with column vectors.
D[a, b] <- sum(rowSums(t(t(sequence[a:b, ]) - Mu_ab)^2))
}
}
return(D)
}
# Fisher's dynamic programming algorithm for time series clustering function
# arguments :
# sequence : the dataset
# segments : the desired number of segments
# return :
# the labels of the points
# the cluster change instants
# the diameter matrix
# the M1 & M2 matrices
# the number of observations in the dataset (useful to compute the criterion)
clustfisher <- function(sequence, segments=2){
# init
K <- segments
n <- nrow(sequence)
p <- ncol(sequence)
M1 <- matrix(data = 0, nrow = n, ncol = K)
M2 <- matrix(data = 0, nrow = n, ncol = K)
t <- rep(0, (K-1))
cluster <- rep(0, (K-1))
D <- diameters(sequence)
# computing optimal criteria
M1[,1] <- D[1,]
for(k in 2:K){
for (i in k:n) {
M1[i, k] <- min(M1[(k-1):(i-1), (k-1)]+D[k:i, i])
# here, we apply which.min on M1[(k-1):(i-1), (k-1)] which is a new matrix with indices that sart over at 1
# there is thus an offset of k-1 in the indices with respect to the original matrix.
M2[i, k] <- which.min(M1[(k-1):(i-1), (k-1)]+D[k:i, i]) + k - 1
}
}
# computing optimal cluster limits in time
k <- (K-1)
m <- n
while(k >= 1){
# m > 0 <=> t[k] - 1 > 0 <=> t[k] > 1 <=> M2[m, (k+1)] > 1 => voir ligne 48
t[k] <- M2[m, (k+1)]
m <- t[k] - 1
k <- k-1
}
# class labels formed from cluster limits
for(i in seq(1, (t[1] - 1))){
cluster[i] <- 1
}
if(K>2){
for(k in seq(2, (K - 1))){
for(i in seq(t[k-1], t[k] - 1)){
cluster[i] <- k
}
}
}
for(i in seq(t[K-1],n)){
cluster[i] <- K
}
return(list("labels" = cluster, "instants" = t, "D" = D, "M1" = M1, "M2" = M2, "n"=n))
}
@Syncrossus
Copy link
Author

This code is released under the WTFPL.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment