Table capturing ratings that users gave to items, with schema: user_id, item_id, rating, date
- group data by
user_id - remove groups with less than 10 items
| import ... | |
| def hpo(train_data, test_data, feature_transformation): | |
| best_accuracy = 0.0 | |
| best_regularizer = None | |
| for regularizer in [None, 'l1', 'l2']: | |
| pipeline = Pipeline([ |
| extern crate timely; | |
| extern crate differential_dataflow; | |
| use serde::{Serialize, Deserialize}; | |
| use timely::dataflow::operators::probe::Handle; | |
| use differential_dataflow::input::Input; | |
| use differential_dataflow::operators::*; | |
| use differential_dataflow::difference::{Abelian, IsZero, Monoid, Semigroup}; |
| extern crate timely; | |
| extern crate differential_dataflow; | |
| use timely::dataflow::operators::probe::Handle; | |
| use differential_dataflow::input::Input; | |
| use differential_dataflow::operators::*; | |
| fn main() { |
| // 'most_recent_neighbors' is a set of ~500 similar sessions | |
| for neighbor_session in most_recent_neighbors.into_iter() { | |
| let mut similarity = 0_f64; | |
| // This returns a HashSet of the items contained in the session | |
| let other_session_items = index.items_for_session(&neighbor_session.id); | |
| // The 'evolving_session' is an array of items of length 'num_items_in_evolving_session' | |
| for (pos, item_id) in evolving_session.iter().enumerate() { | |
| if other_session_items.contains(&item_id) { | |
| let weight = (pos + 1) as f64 / num_items_in_evolving_session as f64; | |
| similarity += decay_factor; |
| import org.apache.spark.sql.{Row, SaveMode} | |
| import org.apache.spark.sql.types._ | |
| val attributesType = new MapType(StringType, StringType, valueContainsNull = false) | |
| val historyEntryType = new StructType() | |
| .add("intervalStart", LongType) | |
| .add("intervalEnd", LongType) | |
| .add("type", StringType) | |
| .add("attributes", attributesType) |
I hereby claim:
To claim this, I am signing this object:
| package org.apache.spark.examples | |
| import org.apache.spark.SparkContext | |
| import org.apache.spark.SparkContext._ | |
| import org.apache.spark.rdd.RDD | |
| import java.util.Random | |
| import scala.collection.mutable | |
| import org.apache.spark.serializer.KryoRegistrator | |
| import com.esotericsoftware.kryo.Kryo |
| package eu.stratosphere.scala.examples.wordcount | |
| import eu.stratosphere.scala.{ScalaPlan, TextFile} | |
| import eu.stratosphere.pact.common.plan.PlanAssembler | |
| import eu.stratosphere.scala._ | |
| import eu.stratosphere.scala.operators._ | |
| case class Author(id: Int, name: String) |
| Index: core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java | |
| =================================================================== | |
| --- core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (Revision 1469532) | |
| +++ core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (Arbeitskopie) | |
| @@ -173,6 +173,7 @@ | |
| } | |
| for (int iteration = 0; iteration < numIterations; iteration++) { | |
| + long start = System.currentTimeMillis(); | |
| log.info("iteration {}", iteration); |