Created
June 22, 2014 23:55
-
-
Save joelkr/25c143d21cf986c7b3ed to your computer and use it in GitHub Desktop.
R Functions for Collaborative Filtering Cardiac Arrythmia Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cost and gradient functions for optim(). These are a port of Coursera | |
# Machine Learning course topic Collaborative Filtering from MATLAB | |
# to R. | |
# Cost Function | |
coFilterCost <- function (params, npatients, nleads, nreadings,Y, R, lambda) | |
{ | |
# params: X and Theta flattened to vector | |
# npatients, nleads, nreadings: number of patients, leads, and numeric | |
# fields in data | |
# The values in X and Theta are what we are optimizing, so they need to be | |
# flattened into a vector of parameters for optim() | |
# X: estimated lead contributions to each recorded reading | |
# Numeric data fields X 12 leads + Lead[0] = 13 | |
# Theta: estimated Cardiograph lead readings | |
# Number of Patients X 12 leads + Lead[0] = 13 | |
# Y: actual patient data | |
# Number Readings X Patients | |
# R: matrix with 1's in positions of Y with patient data and 0's elsewhere. | |
# Numeric Fields X Patients | |
# lambda: regularization parameter. | |
# Add l[0] term for y-intercept | |
nleads <- nleads + 1 | |
sx <- nreadings * nleads | |
st <- npatients * nleads | |
X <- matrix(params[1:sx], nreadings, nleads) | |
Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads) | |
# Must transpose either R and Y or X %*% t(Theta) | |
S <- R * (X %*% t(Theta) - Y) | |
J <- sum(S^2)/2.0 # This is mean squared error | |
# This prevents params growing | |
J <- J + (lambda/2.0)*(sum(Theta^2) + sum(X^2)) | |
return(J) | |
} | |
coFilterGrad <- function(params, npatients, nleads, nreadings, Y, R, lambda) { | |
# Add l[0] term for y-intercept | |
nleads <- nleads + 1 | |
sx <- nreadings * nleads | |
st <- npatients * nleads | |
X <- matrix(params[1:sx], nreadings, nleads) | |
Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads) | |
Theta_grad <- matrix(0, npatients, nleads) | |
X_grad <- matrix(0, nreadings, nleads) | |
S <- R * (X %*% t(Theta) - Y) | |
X_grad <- S %*% Theta | |
Theta_grad <- t(S) %*% X | |
X_grad <- X_grad + (lambda * X) | |
Theta_grad <- Theta_grad + (lambda * Theta) | |
# Flatten everything to a vector as in octave. May be wrong idea. | |
grad <- c(as.vector(X_grad), as.vector(Theta)) | |
return(grad) | |
} | |
normalizeY <- function(Y,R) { | |
# Averaging a quantity changing through time is doubtful. Some readings | |
# might possibly be voltages at intervals or something of the sort. | |
# Dividing by max(row value) seemed to cause optim() to zero all values in | |
# the Theta matrix. This might be due to linear dependence, since it is | |
# multiplying all values by a constant, but I am not sure. Subtracting | |
# the average also causes optim() to zero out the Theta matrix as returned | |
# in $par, but I should normalize the Y matrix somehow, not quite sure what | |
# will work. | |
rc <- dim(Y) | |
Ynorm <- matrix(0, rc[1], rc[2]) | |
nrm <- vector(length=rc[1]) | |
# Need average of each feature. Y is nreadings X npatients | |
# so need average across rows | |
for( i in 1:rc[1]){ | |
idx <- which(R[i,] == 1) | |
if(length(idx) == 0) { | |
nrm[i] <- 0 | |
} | |
else { | |
nrm[i] <- mean(Y[i,idx]) | |
} | |
Ynorm[i, idx] <- Y[i, idx] - nrm[i] | |
} | |
yl <- list(Ynorm, nrm) | |
return(yl) | |
} | |
restoreY <- function(Ynorm, nrm) { | |
rc <- dim(Ynorm) | |
Y <- matrix(0, rc[1], rc[2]) | |
for(i in 1:rc[1]) { | |
Y[i,] <- Ynorm[i,] + nrm[i] | |
} | |
return(Y) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment