Skip to content

Instantly share code, notes, and snippets.

@geofferyzh
Created April 29, 2012 02:45
Show Gist options
  • Save geofferyzh/2525736 to your computer and use it in GitHub Desktop.
Save geofferyzh/2525736 to your computer and use it in GitHub Desktop.
CF - SlopeOne Implementation in R
#########################################################################
#-----------------------------------------------------------------------#
# SlopeOne Recommender Implementation in R #
#########################################################################
#####################
# Sample Data
#####################
# sample data 1
Mov1 <- c(4,4,3,4,2)
Mov2 <- c(NA,2,NA,4,1)
Mov3 <- c(5,1,2,NA,3)
Mov4 <- c(5,NA,4,NA,5)
username = paste("U",1:5,sep="")
t <- paste("Item",1:7, sep="")
item <- data.frame(Mov1,Mov2,Mov3,Mov4, row.names=username)
user <- t(item)
# sample data 2 (Mahout in Action)
U1 <- c(5.0, 3.0, 2.5, NA, NA, NA, NA)
U2 <- c(2.0, 2.5, 5.0, 2.0, NA, NA, NA)
U3 <- c(2.5, NA, NA, 4.0, 4.5, NA, 5.0)
U4 <- c(5.0, NA, 3.0, 4.5, NA, 4.0, NA)
U5 <- c(4.0, 3.0, 2.0, 4.0, 3.5, 4.0, NA)
data <- data.frame(U1,U2,U3,U4,U5, row.names=paste("Item",1:7,sep=""))
#####################################
# SlopeOne Difference Calculation
#####################################
Itemdata <- t(data)
Userdata <- data
diff_xy <- function(x,y) {
n_corate <- length(which(x&y))
if(n_corate == 0){
avg_diff <- NA
}
else {
avg_diff <- sum(x-y,na.rm=TRUE)/n_corate
}
return(avg_diff)
}
slopeone_diff <- function(x) {
m <- matrix(NA, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x)))
diff <- as.data.frame(m)
for(i in 1:ncol(x)) {
for(j in i:ncol(x)) {
if(i==j){
diff[i,j]=NA
diff[j,i]=NA
}
else {
diff[i,j]= diff_xy(x[,i],x[,j])
diff[j,i]= -1 * diff[i,j]
}
}
}
return(diff)
}
slopeonediff <- slopeone_diff(Itemdata)
#####################################
# Recommendation
#####################################
slopeone_reco <- function(userdata,diffdata) {
m <- matrix(NA, nrow=nrow(userdata),ncol=ncol(userdata),dimnames=list(colnames(t(userdata)),colnames(userdata)))
reco <- as.data.frame(m)
# loop over users
for(i in 1:ncol(userdata)) {
# vector of a user's rated items and their corresponding vector index
rated <- c(na.omit(userdata[,i]))
ind_rated <- which(!is.na(userdata[,i]))
ind_unrated <- which(is.na(userdata[,i]))
n_unrated <- length(userdata[is.na(userdata[,i])])
# loop over nonrated items of a user, if there is at least one unrated item
if(n_unrated > 0){
for(j in 1:length(ind_unrated)){
reco[ind_unrated[j],i] = sum(rated - diffdata[ind_rated,ind_unrated[j]],na.rm=TRUE) / length(rated)
}
}
}
return(reco)
}
my.slopeone.reco <- slopeone_reco(Userdata,slopeonediff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment