pferrel · April 5, 2017 17:44
diff --git a/Mahout Simple CCO b/Mahout Simple CCO
 /**
  * Created by rawkintrevo on 4/5/17.
  */

 // Only need these so intelliJ doesn't complain
 import org.apache.mahout.math._
 import org.apache.mahout.math.scalabindings._
 import org.apache.mahout.math.drm._
 import org.apache.mahout.math.scalabindings.RLikeOps._
 import org.apache.mahout.math.drm.RLikeDrmOps._
 import org.apache.mahout.sparkbindings._

 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.SparkConf
 val conf = new SparkConf().setAppName("Simple Application")
 val sc = new SparkContext(conf)

 implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)

 // all this ^^ has been created for you by ./mahout spark-shell but it makes intellij happy

 // don't forget these!
 // export SPARK_HOME=$HOME/gits/spark-1.6.2-bin-hadoop2.6
 // ../mahout/bin/mahout spark-shell



 import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
 import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark

 val userMap = List("Andrew", "Sebastian", "Ted", "Sarah", "Alexy", "Isabelle", "Pat").zipWithIndex.toMap
 val rowIDs = new BiDictionary(userMap)

 val productMap = List("iPhone5", "iPhone6", "Galaxy", "Nexus", "iPad", "Surface").zipWithIndex.toMap
                          // 0        1        2         3        4     5
 val colIDs = new BiDictionary(productMap)

 val buyIndicatorMatrix = sparse((0, 1) :: Nil, // Andrew
                            (2, 1) :: Nil,  // Sebastian
                            (4, 1) :: Nil, // Ted
                            (0, 1) :: Nil, // Sarah
                            (2, 1) :: Nil, // Alexey
                            (2, 1) :: Nil) // Isabelle

 val buyIndicatorDRM = drmParallelize(buyIndicatorMatrix)
 val buyIndicatorIDS = new IndexedDatasetSpark(buyIndicatorDRM, rowIDs, colIDs)

 val viewIndicatorMatrix = sparse( (0, 1) :: (2, 1) :: (3, 1) :: Nil,            // Andrew
                                  (0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil,  // Sebastian
                                  (1, 1) :: (4, 1) :: (5, 1) :: Nil,            // Ted
                                  (0, 1) :: (2, 1) :: (5, 1) :: Nil,            // Sarah
                                  (2, 1) :: (5, 1) :: Nil,                      // Isabelle
                                  (0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil)  // Pat

 val viewIndicatorDRM = drmParallelize(viewIndicatorMatrix)
 val viewIndicatorIDS = new IndexedDatasetSpark(viewIndicatorDRM, rowIDs, colIDs)

 import org.apache.mahout.math.cf.SimilarityAnalysis

 val ccoDRMS = SimilarityAnalysis.cooccurrencesIDSs(Array(buyIndicatorIDS, viewIndicatorIDS),
  randomSeed = 1234,
  maxInterestingItemsPerThing = 1)

 val logLikelihoods = ccoDRMS(0).matrix.collect // THESE ARE MISNAMED LLRS
 val invertedScores = ccoDRMS(1).matrix.collect

 /**
 invertedScores: org.apache.mahout.math.Matrix =
 {
 0 =>   {3:2.6341457841558764}
 1 =>   {}
 2 =>   {2:1.5876494966267813}
 3 =>   {}
 4 =>   {1:5.406734506395658}
 }
 **/
	/**
	* Created by rawkintrevo on 4/5/17.
	*/

	// Only need these so intelliJ doesn't complain
	import org.apache.mahout.math._
	import org.apache.mahout.math.scalabindings._
	import org.apache.mahout.math.drm._
	import org.apache.mahout.math.scalabindings.RLikeOps._
	import org.apache.mahout.math.drm.RLikeDrmOps._
	import org.apache.mahout.sparkbindings._

	import org.apache.spark.SparkContext
	import org.apache.spark.SparkContext._
	import org.apache.spark.SparkConf
	val conf = new SparkConf().setAppName("Simple Application")
	val sc = new SparkContext(conf)

	implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)

	// all this ^^ has been created for you by ./mahout spark-shell but it makes intellij happy

	// don't forget these!
	// export SPARK_HOME=$HOME/gits/spark-1.6.2-bin-hadoop2.6
	// ../mahout/bin/mahout spark-shell



	import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
	import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark

	val userMap = List("Andrew", "Sebastian", "Ted", "Sarah", "Alexy", "Isabelle", "Pat").zipWithIndex.toMap
	val rowIDs = new BiDictionary(userMap)

	val productMap = List("iPhone5", "iPhone6", "Galaxy", "Nexus", "iPad", "Surface").zipWithIndex.toMap
	// 0 1 2 3 4 5
	val colIDs = new BiDictionary(productMap)

	val buyIndicatorMatrix = sparse((0, 1) :: Nil, // Andrew
	(2, 1) :: Nil, // Sebastian
	(4, 1) :: Nil, // Ted
	(0, 1) :: Nil, // Sarah
	(2, 1) :: Nil, // Alexey
	(2, 1) :: Nil) // Isabelle

	val buyIndicatorDRM = drmParallelize(buyIndicatorMatrix)
	val buyIndicatorIDS = new IndexedDatasetSpark(buyIndicatorDRM, rowIDs, colIDs)

	val viewIndicatorMatrix = sparse( (0, 1) :: (2, 1) :: (3, 1) :: Nil, // Andrew
	(0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil, // Sebastian
	(1, 1) :: (4, 1) :: (5, 1) :: Nil, // Ted
	(0, 1) :: (2, 1) :: (5, 1) :: Nil, // Sarah
	(2, 1) :: (5, 1) :: Nil, // Isabelle
	(0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil) // Pat

	val viewIndicatorDRM = drmParallelize(viewIndicatorMatrix)
	val viewIndicatorIDS = new IndexedDatasetSpark(viewIndicatorDRM, rowIDs, colIDs)

	import org.apache.mahout.math.cf.SimilarityAnalysis

	val ccoDRMS = SimilarityAnalysis.cooccurrencesIDSs(Array(buyIndicatorIDS, viewIndicatorIDS),
	randomSeed = 1234,
	maxInterestingItemsPerThing = 1)

	val logLikelihoods = ccoDRMS(0).matrix.collect // THESE ARE MISNAMED LLRS
	val invertedScores = ccoDRMS(1).matrix.collect

	/**
	invertedScores: org.apache.mahout.math.Matrix =
	{
	0 => {3:2.6341457841558764}
	1 => {}
	2 => {2:1.5876494966267813}
	3 => {}
	4 => {1:5.406734506395658}
	}
	**/