This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////// | |
// Generic | |
trait Model[T1,T2] { | |
def train(t: List[(T1, T2)]): Unit // or Boolean? | |
def predict(t: T1): Set[(T2, Confidence)] | |
} | |
trait LinearModel[T1,T2] extends Model[T1,T2] { | |
private val m: MLLibLinearModel // -ish | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* I want to write a data writer that is generic for the Warehouse API's data | |
* however, this won't compile as d.toDF needs some implicits that can only be found if | |
* (1) T is a case class (that is why T <: Product) and | |
* (2) T is defiend in the proper place (see https://issues.scala-lang.org/browse/SI-6649 or http://stackoverflow.com/questions/33704831/value-todf-is-not-a-member-of-org-apache-spark-rdd-rdd) | |
* | |
* I think step (1) is OK, but the typetags (step (2)) are not working. | |
*/ | |
case class GenericDataWriter[T <: Product](name: String, sqlC: SQLContext, stage: Stage, fmt: Format) extends Serializable with Logging with DataWriter[T] { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.mediative.mpn.brain.datascience.platform | |
package nationals.recommendations.strategy | |
import com.cra.figaro.library.atomic.continuous.{ AtomicNormal, Normal } | |
import com.holdenkarau.spark.testing.{ DatasetGenerator, DatasetSuiteBase } | |
import com.mediative.mpn.brain.datascience.platform.nationals.generators | |
import org.apache.spark.sql.Dataset | |
import org.scalacheck.Gen | |
import org.scalatest.FreeSpec | |
import org.scalatest.prop.GeneratorDrivenPropertyChecks |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def summarize2(dsSEMUnionClean: Dataset[SEMUnionClean]): Dataset[SEMUnionClean] = { | |
import dsSEMUnionClean.sparkSession.implicits._ | |
import org.apache.spark.sql.expressions.scalalang.typed.{ | |
count => typedCount, | |
sum => typedSum | |
} | |
val xxx = | |
dsSEMUnionClean.groupByKey(r => (r.account_descriptive_name, r.advertiser_id, r.advertiser_name, r.campaign_id, r.campaign_name, r.source)) | |
.agg( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
from abc import abstractmethod | |
from typing import List, Set, Callable, Tuple, Dict | |
from enum import Enum, auto | |
class PlayerType(Enum): | |
DEFENSIVE = auto() | |
OFFENSIVE = auto() | |
NEUTRAL = auto() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from Utils.programming.ut_find_folders import * | |
from ReinforcementLearning.NHL.playbyplay.playbyplay_data import * | |
# Pointers to the data | |
repoCode = '/Users/hectorb/Desktop/Younes/Code/NHL_stats_SL' | |
db_root = '/Users/hectorb/Desktop/Younes/Databases/Hockey' | |
repoPbP = path.join(db_root, 'PlayByPlay') | |
repoPSt = path.join(db_root, 'PlayerStats/player') |
OlderNewer