Syncrossus · September 5, 2019 08:42 · Syncrossus · Sep 5, 2019
diff --git a/fisher_dynamic_programming.R b/fisher_dynamic_programming.R
 # Fisher's criterion computation function
 # arguments :
 #    clusters : list as returned by clustfisher (see below)
 # return :
 #    the value of the criterion
 fisherCriterion <- function(clusters){
  diameterSum <- 0
  if(length(clusters$instants)==1){
    diameterSum <- sum(clusters$D[clusters$instants[1:length(clusters$instants)-1], clusters$instants[2:length(clusters$instants)]-1])
  }
  return(clusters$D[1, clusters$instants[1]-1] + diameterSum + clusters$D[clusters$instants[length(clusters$instants)], clusters$n])
 }

 # Diameter matrix computation function as defined in Fisher's algorithm
 # arguments :
 #    sequence : the dataset
 # return :
 #    The Diameter matrix
 diameters <- function(sequence){
  n <- nrow(sequence)
  D <- matrix(data = 0, nrow = n, ncol = n)
  
  #diameter triangular matrix computation
  for(a in 1:n){
    for(b in a:n){
      if(dim(sequence)[2]==1){  # handling 1 dimension datasets that otherwise crash colSums(sequence[a:b, ])
        Mu_ab <- sum(sequence[a:b, ]) / (b - a + 1)
      }else if(is.null(dim(sequence[a:b, ]))){  # handling a=b, which otherwise crashes colSums(sequence[a:b, ])
        Mu_ab <- sequence[a:b, ]
      }else{
        Mu_ab <- colSums(sequence[a:b, ]) / (b - a + 1)
      }
      # - double transposing is required because R is weird with column vectors.
      D[a, b] <- sum(rowSums(t(t(sequence[a:b, ]) - Mu_ab)^2))
    }
  }
  return(D)
 }

 # Fisher's dynamic programming algorithm for time series clustering function
 # arguments :
 #    sequence : the dataset
 #    segments : the desired number of segments
 # return :
 #    the labels of the points
 #    the cluster change instants
 #    the diameter matrix
 #    the M1 & M2 matrices
 #    the number of observations in the dataset (useful to compute the criterion)
 clustfisher <- function(sequence, segments=2){
  # init
  K <- segments
  n <- nrow(sequence)
  p <- ncol(sequence)
  M1 <- matrix(data = 0, nrow = n, ncol = K)
  M2 <- matrix(data = 0, nrow = n, ncol = K)
  t <- rep(0, (K-1))
  cluster <- rep(0, (K-1))
  
  D <- diameters(sequence)
  
  # computing optimal criteria
  M1[,1] <- D[1,]
  for(k in 2:K){
    for (i in k:n) {
      M1[i, k] <- min(M1[(k-1):(i-1), (k-1)]+D[k:i, i])
      # here, we apply which.min on M1[(k-1):(i-1), (k-1)] which is a new matrix with indices that sart over at 1
      # there is thus an offset of k-1 in the indices with respect to the original matrix.
      M2[i, k] <- which.min(M1[(k-1):(i-1), (k-1)]+D[k:i, i]) + k - 1
    }
  }
  
  # computing optimal cluster limits in time
  k <- (K-1)
  m <- n
  while(k >= 1){
    # m > 0 <=> t[k] - 1 > 0 <=> t[k] > 1 <=> M2[m, (k+1)] > 1 => voir ligne 48
    t[k] <- M2[m, (k+1)]
    m <- t[k] - 1
    k <- k-1
  }
  
  # class labels formed from cluster limits
  for(i in seq(1, (t[1] - 1))){
    cluster[i] <- 1
  }
  if(K>2){
    for(k in seq(2, (K - 1))){
      for(i in seq(t[k-1], t[k] - 1)){
        cluster[i] <- k
      }
    }
  }
  
  for(i in seq(t[K-1],n)){
    cluster[i] <- K
  }
  return(list("labels" = cluster, "instants" = t, "D" = D, "M1" = M1, "M2" = M2, "n"=n))
 }
	# Fisher's criterion computation function
	# arguments :
	# clusters : list as returned by clustfisher (see below)
	# return :
	# the value of the criterion
	fisherCriterion <- function(clusters){
	diameterSum <- 0
	if(length(clusters$instants)==1){
	diameterSum <- sum(clusters$D[clusters$instants[1:length(clusters$instants)-1], clusters$instants[2:length(clusters$instants)]-1])
	}
	return(clusters$D[1, clusters$instants[1]-1] + diameterSum + clusters$D[clusters$instants[length(clusters$instants)], clusters$n])
	}

	# Diameter matrix computation function as defined in Fisher's algorithm
	# arguments :
	# sequence : the dataset
	# return :
	# The Diameter matrix
	diameters <- function(sequence){
	n <- nrow(sequence)
	D <- matrix(data = 0, nrow = n, ncol = n)

	#diameter triangular matrix computation
	for(a in 1:n){
	for(b in a:n){
	if(dim(sequence)[2]==1){ # handling 1 dimension datasets that otherwise crash colSums(sequence[a:b, ])
	Mu_ab <- sum(sequence[a:b, ]) / (b - a + 1)
	}else if(is.null(dim(sequence[a:b, ]))){ # handling a=b, which otherwise crashes colSums(sequence[a:b, ])
	Mu_ab <- sequence[a:b, ]
	}else{
	Mu_ab <- colSums(sequence[a:b, ]) / (b - a + 1)
	}
	# - double transposing is required because R is weird with column vectors.
	D[a, b] <- sum(rowSums(t(t(sequence[a:b, ]) - Mu_ab)^2))
	}
	}
	return(D)
	}

	# Fisher's dynamic programming algorithm for time series clustering function
	# arguments :
	# sequence : the dataset
	# segments : the desired number of segments
	# return :
	# the labels of the points
	# the cluster change instants
	# the diameter matrix
	# the M1 & M2 matrices
	# the number of observations in the dataset (useful to compute the criterion)
	clustfisher <- function(sequence, segments=2){
	# init
	K <- segments
	n <- nrow(sequence)
	p <- ncol(sequence)
	M1 <- matrix(data = 0, nrow = n, ncol = K)
	M2 <- matrix(data = 0, nrow = n, ncol = K)
	t <- rep(0, (K-1))
	cluster <- rep(0, (K-1))

	D <- diameters(sequence)

	# computing optimal criteria
	M1[,1] <- D[1,]
	for(k in 2:K){
	for (i in k:n) {
	M1[i, k] <- min(M1[(k-1):(i-1), (k-1)]+D[k:i, i])
	# here, we apply which.min on M1[(k-1):(i-1), (k-1)] which is a new matrix with indices that sart over at 1
	# there is thus an offset of k-1 in the indices with respect to the original matrix.
	M2[i, k] <- which.min(M1[(k-1):(i-1), (k-1)]+D[k:i, i]) + k - 1
	}
	}

	# computing optimal cluster limits in time
	k <- (K-1)
	m <- n
	while(k >= 1){
	# m > 0 <=> t[k] - 1 > 0 <=> t[k] > 1 <=> M2[m, (k+1)] > 1 => voir ligne 48
	t[k] <- M2[m, (k+1)]
	m <- t[k] - 1
	k <- k-1
	}

	# class labels formed from cluster limits
	for(i in seq(1, (t[1] - 1))){
	cluster[i] <- 1
	}
	if(K>2){
	for(k in seq(2, (K - 1))){
	for(i in seq(t[k-1], t[k] - 1)){
	cluster[i] <- k
	}
	}
	}

	for(i in seq(t[K-1],n)){
	cluster[i] <- K
	}
	return(list("labels" = cluster, "instants" = t, "D" = D, "M1" = M1, "M2" = M2, "n"=n))
	}