Skip to content

Instantly share code, notes, and snippets.

View ldacosta's full-sized avatar

Luis Da Costa ldacosta

View GitHub Profile
/////////////////////////////////
// Generic
trait Model[T1,T2] {
def train(t: List[(T1, T2)]): Unit // or Boolean?
def predict(t: T1): Set[(T2, Confidence)]
}
trait LinearModel[T1,T2] extends Model[T1,T2] {
private val m: MLLibLinearModel // -ish
}
@ldacosta
ldacosta / GenericDataWriter.scala
Last active February 12, 2016 21:58
I want to write a data writer that is generic for the Warehouse API's data
/**
* I want to write a data writer that is generic for the Warehouse API's data
* however, this won't compile as d.toDF needs some implicits that can only be found if
* (1) T is a case class (that is why T <: Product) and
* (2) T is defiend in the proper place (see https://issues.scala-lang.org/browse/SI-6649 or http://stackoverflow.com/questions/33704831/value-todf-is-not-a-member-of-org-apache-spark-rdd-rdd)
*
* I think step (1) is OK, but the typetags (step (2)) are not working.
*/
case class GenericDataWriter[T <: Product](name: String, sqlC: SQLContext, stage: Stage, fmt: Format) extends Serializable with Logging with DataWriter[T] {
package com.mediative.mpn.brain.datascience.platform
package nationals.recommendations.strategy
import com.cra.figaro.library.atomic.continuous.{ AtomicNormal, Normal }
import com.holdenkarau.spark.testing.{ DatasetGenerator, DatasetSuiteBase }
import com.mediative.mpn.brain.datascience.platform.nationals.generators
import org.apache.spark.sql.Dataset
import org.scalacheck.Gen
import org.scalatest.FreeSpec
import org.scalatest.prop.GeneratorDrivenPropertyChecks
def summarize2(dsSEMUnionClean: Dataset[SEMUnionClean]): Dataset[SEMUnionClean] = {
import dsSEMUnionClean.sparkSession.implicits._
import org.apache.spark.sql.expressions.scalalang.typed.{
count => typedCount,
sum => typedSum
}
val xxx =
dsSEMUnionClean.groupByKey(r => (r.account_descriptive_name, r.advertiser_id, r.advertiser_name, r.campaign_id, r.campaign_name, r.source))
.agg(
import itertools
from abc import abstractmethod
from typing import List, Set, Callable, Tuple, Dict
from enum import Enum, auto
class PlayerType(Enum):
DEFENSIVE = auto()
OFFENSIVE = auto()
NEUTRAL = auto()
from os import path
from Utils.programming.ut_find_folders import *
from ReinforcementLearning.NHL.playbyplay.playbyplay_data import *
# Pointers to the data
repoCode = '/Users/hectorb/Desktop/Younes/Code/NHL_stats_SL'
db_root = '/Users/hectorb/Desktop/Younes/Databases/Hockey'
repoPbP = path.join(db_root, 'PlayByPlay')
repoPSt = path.join(db_root, 'PlayerStats/player')