Skip to content

Instantly share code, notes, and snippets.

@hamnis
Last active August 29, 2015 14:15
Show Gist options
  • Save hamnis/e396854f4654bd46ebe0 to your computer and use it in GitHub Desktop.
Save hamnis/e396854f4654bd46ebe0 to your computer and use it in GitHub Desktop.
package recsys
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating}
import org.apache.spark.rdd.RDD
import org.jblas.DoubleMatrix
import org.slf4j.LoggerFactory
package object collaborative {
implicit class MatrixFactorizationModelOps(val m: MatrixFactorizationModel) extends AnyVal {
private def logger = LoggerFactory.getLogger("recsys.MatrixFactorizationModelOps") // because of anyval.
def recommendItems(numberOfRecommendations: Int): RDD[(Int, Array[(Int, Double)])] = {
val items = m.productFeatures.map(_._1)
logger.info("Generating recommendations for %s items ".format(items.count()))
val collectedItems = items.toLocalIterator
val rec = collectedItems.map(c => c -> recommendItemsForItems(c, m, numberOfRecommendations))
items.context.parallelize(rec.toVector)
}
def recommendItemsForUsers(numberOfRecommendations: Int): RDD[(Int, Array[(Int, Double)])] = {
val users = m.userFeatures.map(_._1)
logger.info("Generating recommendations for %s users ".format(users.count()))
val collectedItems = users.toLocalIterator
val rec = collectedItems.map(c => c -> m.recommendProducts(c, numberOfRecommendations).map(r => r.product -> r.rating))
users.context.parallelize(rec.toVector)
}
private def recommendItemsForItems(item: Int, model: MatrixFactorizationModel, num: Int): Array[(Int, Double)] = {
recommend(model.productFeatures.lookup(item).head, model.productFeatures, num)
}
def recommend(recommendToFeatures: Array[Double],
recommendableFeatures: RDD[(Int, Array[Double])],
num: Int): Array[(Int, Double)] = {
val recommendToVector = new DoubleMatrix(recommendToFeatures)
val scored = recommendableFeatures.map { case (id, features) =>
(id, recommendToVector.dot(new DoubleMatrix(features)))
}
scored.top(num)(Ordering.by(_._2))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment