This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from Utils.programming.ut_find_folders import * | |
from ReinforcementLearning.NHL.playbyplay.playbyplay_data import * | |
# Pointers to the data | |
repoCode = '/Users/hectorb/Desktop/Younes/Code/NHL_stats_SL' | |
db_root = '/Users/hectorb/Desktop/Younes/Databases/Hockey' | |
repoPbP = path.join(db_root, 'PlayByPlay') | |
repoPSt = path.join(db_root, 'PlayerStats/player') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
from abc import abstractmethod | |
from typing import List, Set, Callable, Tuple, Dict | |
from enum import Enum, auto | |
class PlayerType(Enum): | |
DEFENSIVE = auto() | |
OFFENSIVE = auto() | |
NEUTRAL = auto() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def summarize2(dsSEMUnionClean: Dataset[SEMUnionClean]): Dataset[SEMUnionClean] = { | |
import dsSEMUnionClean.sparkSession.implicits._ | |
import org.apache.spark.sql.expressions.scalalang.typed.{ | |
count => typedCount, | |
sum => typedSum | |
} | |
val xxx = | |
dsSEMUnionClean.groupByKey(r => (r.account_descriptive_name, r.advertiser_id, r.advertiser_name, r.campaign_id, r.campaign_name, r.source)) | |
.agg( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.mediative.mpn.brain.datascience.platform | |
package nationals.recommendations.strategy | |
import com.cra.figaro.library.atomic.continuous.{ AtomicNormal, Normal } | |
import com.holdenkarau.spark.testing.{ DatasetGenerator, DatasetSuiteBase } | |
import com.mediative.mpn.brain.datascience.platform.nationals.generators | |
import org.apache.spark.sql.Dataset | |
import org.scalacheck.Gen | |
import org.scalatest.FreeSpec | |
import org.scalatest.prop.GeneratorDrivenPropertyChecks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* I want to write a data writer that is generic for the Warehouse API's data | |
* however, this won't compile as d.toDF needs some implicits that can only be found if | |
* (1) T is a case class (that is why T <: Product) and | |
* (2) T is defiend in the proper place (see https://issues.scala-lang.org/browse/SI-6649 or http://stackoverflow.com/questions/33704831/value-todf-is-not-a-member-of-org-apache-spark-rdd-rdd) | |
* | |
* I think step (1) is OK, but the typetags (step (2)) are not working. | |
*/ | |
case class GenericDataWriter[T <: Product](name: String, sqlC: SQLContext, stage: Stage, fmt: Format) extends Serializable with Logging with DataWriter[T] { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////// | |
// Generic | |
trait Model[T1,T2] { | |
def train(t: List[(T1, T2)]): Unit // or Boolean? | |
def predict(t: T1): Set[(T2, Confidence)] | |
} | |
trait LinearModel[T1,T2] extends Model[T1,T2] { | |
private val m: MLLibLinearModel // -ish | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.github.nscala_time.time.Imports._ | |
import org.joda.time.Days | |
val nowInToronto:DateTime = DateTime.now(DateTimeZone.forID("America/Toronto")) | |
val nowInTorontoInUTC: DateTime = new org.joda.time.DateTime( nowInToronto, DateTimeZone.UTC) | |
println(nowInToronto.toString) | |
println(nowInTorontoInUTC.toString) | |
assert(Days.daysBetween( nowInToronto, nowInTorontoInUTC ).getDays == 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private def templateForATest(dir: String, sql: SQLContext) = { | |
// whatever structure we want to test | |
val good = CleanData(reportDate = Timestamp.valueOf("2015-08-01 19:20:21"), // yyyy-[m]m-[d]d hh:mm:ss | |
date = Timestamp.valueOf("2015-08-01 19:20:21"), | |
impressions = 10, | |
clicks = 20, | |
totalConversions = 33, | |
impressionsConvergenceProbability = Some(0.99), | |
clicksConvergenceProbability = Some(0.99), | |
totalConversionsConvergenceProbability = Some(0.99)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Stuff below doesn't compile | |
trait ATrait { | |
val something: Int | |
// <some stuff here> | |
case class AType[T](value: T) | |
} | |
case class MyClass(something: Int, f: AType => Int) extends ATrait // error: not found: type AType | |
// this version needs _something_ to be defined: | |
object Wrapper extends ATrait { // error: something undefined |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
trait Model[T] { | |
def name: String // TODO: not sure of its utility | |
def randomPick: Seq[Double] | |
} | |
trait ContinuousModel[T] extends Model[T] { | |
implicit val continuousOpt: Continuous[T] | |
} |
NewerOlder