pgilad · December 9, 2016 08:39
diff --git a/optim-log-likelihood.R b/optim-log-likelihood.R
 setwd("~/Documents/MA Statistics/Statistical Models A/targil 4")

 H <- matrix(c(0, 2, 4, 6, 8, 2, 0, 2, 4, 6, 4, 2, 0, 2, 4, 6, 4, 2, 0, 2, 8, 6, 4, 2, 0), byrow = TRUE, nrow = 5)
 X <- as.matrix(read.table("./targ3dat.txt", quote = "\"", comment.char = ""))
 n <- nrow(X)
 d <- ncol(X)
 S <- sum(apply(X, 1, crossprod))

 calculate_V <- function(theta) {
  psi <- theta[1:d]
  gamma <- theta[d + 1]

  # entry wise multiplication
  return (outer(psi, psi) * exp(-gamma * H))
 }

 log_likelihood <- function(theta) {
  V <- calculate_V(theta)
  first_expression <- -n*d/2 * log(2 * pi)
  second_expression <- -n/2 * log(det(V))
  third_expression <- -1/2 * sum(diag(solve(V) * S))

  # use negative l.l because optim tries to minimize the function
  return (-1 * (first_expression + second_expression + third_expression))
 }

 derive_V <- function(V, theta, k) {
  V_temp <- matrix(0, nrow = nrow(V), ncol = ncol(V))
  gamma <- -theta[d + 1]

  for (r in 1:nrow(V)) {
    for (s in 1:ncol(V)) {
      if (k <= d) {
        psi_1 <- ifelse(r == k, theta[s], 0)
        psi_2 <- ifelse(s == k, theta[r], 0)
        V_temp[r, s] <- (psi_1 + psi_2) * exp(-gamma * H[r, s])
      } else {
        V_temp[r, s] <- -theta[s] * theta[r] * H[r, s] * exp(-gamma * H[r, s])
      }
    }
  }

  return (V_temp)
 }

 get_gradient_column <- function(i, V, theta) {
  grad <- derive_V(V, theta, i)
  first_expression <- -n/2 * sum(diag(solve(V) %*% grad))
  second_expression <- -n/2 * sum(diag(solve(V) %*% grad %*% solve(V) * S))

  return (first_expression + second_expression)
 }

 gradient <- function(theta) {
  V <- calculate_V(theta)
  next_gradient <- sapply(1:length(theta), get_gradient_column, V = V, theta = theta)

  return (next_gradient)
 }

 initial_theta <- rep(1, 6)
 sol <- optim(initial_theta, fn = log_likelihood, gr = gradient, method = "BFGS")

 # echo solved theta
 print (sol)
diff --git a/targ3dat.txt b/targ3dat.txt
 1 -1.13 -0.42 -0.51 -0.3
 2 -1.13  0.28 -0.01  1.9
 3 -1.13  0.68  1.09  2.2
 4  0.47  1.28 -0.11 -1.3
 5  2.07 -0.42 -0.51 -0.7
 6 -0.83 -1.32 -0.31  0.4
 7 -0.43  0.28 -0.41 -1.2
 8 -0.03  0.28 -0.51  0.8
 9 -0.13  0.98 -0.81  1.0
 10  0.97  0.68 -0.91 -0.4
 11 -0.43 -0.12  0.19  0.2
 12  3.07  0.28  1.09 -0.8
 13 -0.43 -1.12 -0.31 -0.7
 14  1.07  0.38 -0.01 -1.7
 15 -0.83 -1.12 -2.41 -1.4
 16  0.77 -0.02  0.89 -0.5
 17 -0.43 -1.32 -0.31 -0.2
 18 -0.53 -1.42 -1.21 -2.1
 19  1.47  0.28  0.29 -0.5
 20 -0.43 -0.72  0.49  0.0
 21 -1.33  1.38 -0.01  0.5
 22 -0.83 -1.12 -1.31 -0.2
 23 -0.43 -0.42 -0.31  0.5
 24  1.47  0.38 -0.91  0.1
 25  0.07 -1.02  0.19 -0.5
 26  0.47  0.28  0.89 -0.1
 27 -0.33  0.98 -0.61  1.1
 28  0.47  0.28  0.89  1.1
 29 -1.13  0.68  0.99 -0.8
 30  2.17  0.28  1.79  1.2
 31 -0.43  0.18 -0.41  0.3
 32 -0.43 -0.02  0.49  0.7
 33 -1.23  0.68 -0.41 -0.6
 34  0.77  0.28  0.59  0.2
 35 -0.43  0.28  1.09  1.4
 36 -0.43  0.28  0.59  0.9
 37 -0.43 -0.82  0.69 -0.7
	setwd("~/Documents/MA Statistics/Statistical Models A/targil 4")

	H <- matrix(c(0, 2, 4, 6, 8, 2, 0, 2, 4, 6, 4, 2, 0, 2, 4, 6, 4, 2, 0, 2, 8, 6, 4, 2, 0), byrow = TRUE, nrow = 5)
	X <- as.matrix(read.table("./targ3dat.txt", quote = "\"", comment.char = ""))
	n <- nrow(X)
	d <- ncol(X)
	S <- sum(apply(X, 1, crossprod))

	calculate_V <- function(theta) {
	psi <- theta[1:d]
	gamma <- theta[d + 1]

	# entry wise multiplication
	return (outer(psi, psi) * exp(-gamma * H))
	}

	log_likelihood <- function(theta) {
	V <- calculate_V(theta)
	first_expression <- -nd/2 log(2 * pi)
	second_expression <- -n/2 * log(det(V))
	third_expression <- -1/2 * sum(diag(solve(V) * S))

	# use negative l.l because optim tries to minimize the function
	return (-1 * (first_expression + second_expression + third_expression))
	}

	derive_V <- function(V, theta, k) {
	V_temp <- matrix(0, nrow = nrow(V), ncol = ncol(V))
	gamma <- -theta[d + 1]

	for (r in 1:nrow(V)) {
	for (s in 1:ncol(V)) {
	if (k <= d) {
	psi_1 <- ifelse(r == k, theta[s], 0)
	psi_2 <- ifelse(s == k, theta[r], 0)
	V_temp[r, s] <- (psi_1 + psi_2) * exp(-gamma * H[r, s])
	} else {
	V_temp[r, s] <- -theta[s] * theta[r] * H[r, s] * exp(-gamma * H[r, s])
	}
	}
	}

	return (V_temp)
	}

	get_gradient_column <- function(i, V, theta) {
	grad <- derive_V(V, theta, i)
	first_expression <- -n/2 * sum(diag(solve(V) %*% grad))
	second_expression <- -n/2 * sum(diag(solve(V) %% grad %% solve(V) * S))

	return (first_expression + second_expression)
	}

	gradient <- function(theta) {
	V <- calculate_V(theta)
	next_gradient <- sapply(1:length(theta), get_gradient_column, V = V, theta = theta)

	return (next_gradient)
	}

	initial_theta <- rep(1, 6)
	sol <- optim(initial_theta, fn = log_likelihood, gr = gradient, method = "BFGS")

	# echo solved theta
	print (sol)
	1 -1.13 -0.42 -0.51 -0.3
	2 -1.13 0.28 -0.01 1.9
	3 -1.13 0.68 1.09 2.2
	4 0.47 1.28 -0.11 -1.3
	5 2.07 -0.42 -0.51 -0.7
	6 -0.83 -1.32 -0.31 0.4
	7 -0.43 0.28 -0.41 -1.2
	8 -0.03 0.28 -0.51 0.8
	9 -0.13 0.98 -0.81 1.0
	10 0.97 0.68 -0.91 -0.4
	11 -0.43 -0.12 0.19 0.2
	12 3.07 0.28 1.09 -0.8
	13 -0.43 -1.12 -0.31 -0.7
	14 1.07 0.38 -0.01 -1.7
	15 -0.83 -1.12 -2.41 -1.4
	16 0.77 -0.02 0.89 -0.5
	17 -0.43 -1.32 -0.31 -0.2
	18 -0.53 -1.42 -1.21 -2.1
	19 1.47 0.28 0.29 -0.5
	20 -0.43 -0.72 0.49 0.0
	21 -1.33 1.38 -0.01 0.5
	22 -0.83 -1.12 -1.31 -0.2
	23 -0.43 -0.42 -0.31 0.5
	24 1.47 0.38 -0.91 0.1
	25 0.07 -1.02 0.19 -0.5
	26 0.47 0.28 0.89 -0.1
	27 -0.33 0.98 -0.61 1.1
	28 0.47 0.28 0.89 1.1
	29 -1.13 0.68 0.99 -0.8
	30 2.17 0.28 1.79 1.2
	31 -0.43 0.18 -0.41 0.3
	32 -0.43 -0.02 0.49 0.7
	33 -1.23 0.68 -0.41 -0.6
	34 0.77 0.28 0.59 0.2
	35 -0.43 0.28 1.09 1.4
	36 -0.43 0.28 0.59 0.9
	37 -0.43 -0.82 0.69 -0.7