This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import shapeless._, syntax.singleton._, record._, ops.hlist._ | |
/** | |
* Given an instance A and it's generic representation AR and function f from AR => BR | |
* we can covert A to B if we also have the generic representation of BR as B | |
* We also handle misalignments using shapeless's align typeclass (https://stackoverflow.com/questions/29242873/shapeless-turn-a-case-class-into-another-with-fields-in-different-order) | |
*/ | |
case class Morph[A, AR](a: A)(implicit reprA: LabelledGeneric.Aux[A, AR]) { | |
// Why this DSL you say? Hack to get around scalac idiocy: https://stackoverflow.com/a/46614684/471136 | |
def to[B] = new { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.InputStream | |
import better.files._ | |
import squants.information._, InformationConversions._ | |
object GzipSplitter { | |
/** Splits the $inputstream into approximately equal chunks of $splitSize gzip files under $outputDirectory */ | |
def split( | |
inputStream : InputStream, | |
outputDirectory : File = File.newTemporaryDirectory(), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.charset.{ Charset, StandardCharsets } | |
import org.apache.spark.sql._ | |
import org.apache.spark.sql.types._ | |
object SparkDataLoad { | |
def fromCsv[A : Encoder]( | |
path: Set[String], | |
encoding: Charset = StandardCharsets.UTF_8, | |
useHeader: Boolean = false, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Distance between 2 coordinates (in degrees) */ | |
def dist( | |
p1: (Double, Double), // Coordinate 1 (in degrees) | |
p2: (Double, Double), // Coordinate 2 (in degrees) | |
manhattanDist: Boolean = false, // If true, calculate Manhattan distance on the sphere :) | |
diameter: Double = 7917.5 // Diameter of Earth in miles; set this to whatever planet/units you want | |
): Double = { | |
import Math._ | |
def haversine(theta: Double) = (1 - cos(theta))/2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.concurrent.TimeUnit | |
import scala.concurrent.duration.Duration | |
import com.google.common.util.concurrent.Monitor | |
class BooleanMonitor(monitor: Monitor = new Monitor())(check: => Boolean) { | |
private val guard = new Monitor.Guard(monitor) { override def isSatisfied = check } | |
def whenSatisfied[U](timeout: Duration = Duration.Inf)(f: => U): U = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.types._ | |
import org.apache.spark.sql._ | |
object SchemaDsl { | |
case class ScalaToSparkType[ScalaType](sparkType: DataType, isNullable: Boolean = false) { | |
def toField(name: String) = StructField(name = name, dataType = sparkType, nullable = isNullable) | |
} | |
implicit val stringType: ScalaToSparkType[String] = ScalaToSparkType(StringType) | |
implicit val intType: ScalaToSparkType[Int] = ScalaToSparkType(IntegerType) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import better.files._ | |
def moveDupes( | |
dir: File, | |
logFile: File = (File.home / "dupes.txt"), | |
dupeFolder: File = (File.home / 'dupes).createDirectory() | |
) = { | |
for { | |
log <- logFile.printWriter() | |
(hash, toKeep :: toMove) <- dir.listRecursively.toSeq.groupBy(_.md5).mapValues(_.toList) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Quick and dirty Scala app to print git commit punch-card e.g. | |
* | |
* ┃08┃09┃10┃11┃12┃13┃14┃15┃16┃17┃18┃19┃20┃21┃22┃23┃00┃01┃02┃03┃04┃05┃06┃07┃ | |
* Sun┃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ | |
* Mon┃▁▁▁▄▄▄▅▅▅▅▅▅▄▄▄▆▆▆▇▇▇▇▇▇███▆▆▆▅▅▅▄▄▄▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ | |
* Tue┃▁▁▁▃▃▃▆▆▆▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ | |
* Wed┃▁▁▁▄▄▄▅▅▅▇▇▇▅▅▅▅▅▅███▇▇▇▅▅▅▆▆▆▇▇▇▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ | |
* Thu┃▁▁▁▂▂▂▄▄▄▆▆▆▅▅▅▆▆▆▇▇▇▇▇▇▆▆▆▇▇▇▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ | |
* Fri┃▁▁▁▂▂▂▄▄▄▅▅▅▅▅▅▄▄▄▄▄▄▅▅▅▅▅▅▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
triggeredMessage := Watched.clearWhenTriggered | |
libraryDependencies += "com.lihaoyi" % "ammonite" % "latest.release" % "test" cross CrossVersion.full | |
initialCommands in (Test, console) := """ammonite.Main().run()""" | |
watchSources ++= ( | |
(baseDirectory.value * "*.sbt").get | |
++ (baseDirectory.value / "project" * "*.scala").get | |
++ (baseDirectory.value / "project" * "*.sbt").get |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.generic.Growable | |
/** | |
* Boyer–Moore majority vote algorithm (https://en.wikipedia.org/wiki/Boyer–Moore_majority_vote_algorithm) | |
* A Data structure that supports O(1) tracking of the majority element in streaming data | |
* (i.e. something that occurs strictly > 50% of the time) | |
*/ | |
class MajorityElement[A] extends Growable[A] { | |
private[this] var majorityElement = Option.empty[A] | |
private[this] var count = 0 |