Skip to content

Instantly share code, notes, and snippets.

@pathikrit
pathikrit / GzipSplitter.scala
Last active December 12, 2019 21:29
Split a file into multiple GZIP files
import java.io.InputStream
import better.files._
import squants.information._, InformationConversions._
object GzipSplitter {
/** Splits the $inputstream into approximately equal chunks of $splitSize gzip files under $outputDirectory */
def split(
inputStream : InputStream,
outputDirectory : File = File.newTemporaryDirectory(),
@pathikrit
pathikrit / SparkDataLoad.scala
Last active June 1, 2020 15:03
Spark utils to ship data
import java.nio.charset.{ Charset, StandardCharsets }
import org.apache.spark.sql._
import org.apache.spark.sql.types._
object SparkDataLoad {
def fromCsv[A : Encoder](
path: Set[String],
encoding: Charset = StandardCharsets.UTF_8,
useHeader: Boolean = false,
@pathikrit
pathikrit / SphericalDistance.scala
Last active September 24, 2019 19:06
Distance calculator between 2 coordinates on a planet
/** Distance between 2 coordinates (in degrees) */
def dist(
p1: (Double, Double), // Coordinate 1 (in degrees)
p2: (Double, Double), // Coordinate 2 (in degrees)
manhattanDist: Boolean = false, // If true, calculate Manhattan distance on the sphere :)
diameter: Double = 7917.5 // Diameter of Earth in miles; set this to whatever planet/units you want
): Double = {
import Math._
def haversine(theta: Double) = (1 - cos(theta))/2
@pathikrit
pathikrit / BooleanMonitor.scala
Last active August 16, 2019 13:05
Boolean Monitor
import java.util.concurrent.TimeUnit
import scala.concurrent.duration.Duration
import com.google.common.util.concurrent.Monitor
class BooleanMonitor(monitor: Monitor = new Monitor())(check: => Boolean) {
private val guard = new Monitor.Guard(monitor) { override def isSatisfied = check }
def whenSatisfied[U](timeout: Duration = Duration.Inf)(f: => U): U = {
@pathikrit
pathikrit / SparkSchemaDsl.scala
Created July 12, 2019 13:01
Spark Schema DSL
import org.apache.spark.sql.types._
import org.apache.spark.sql._
object SchemaDsl {
case class ScalaToSparkType[ScalaType](sparkType: DataType, isNullable: Boolean = false) {
def toField(name: String) = StructField(name = name, dataType = sparkType, nullable = isNullable)
}
implicit val stringType: ScalaToSparkType[String] = ScalaToSparkType(StringType)
implicit val intType: ScalaToSparkType[Int] = ScalaToSparkType(IntegerType)
@pathikrit
pathikrit / script.scala
Last active December 12, 2018 19:50
Move duplicate files to a directory
import better.files._
def moveDupes(
dir: File,
logFile: File = (File.home / "dupes.txt"),
dupeFolder: File = (File.home / 'dupes).createDirectory()
) = {
for {
log <- logFile.printWriter()
(hash, toKeep :: toMove) <- dir.listRecursively.toSeq.groupBy(_.md5).mapValues(_.toList)
@pathikrit
pathikrit / GitPunchCard.scala
Last active April 15, 2018 11:17
Scala Script to print Git PunchCard
/**
* Quick and dirty Scala app to print git commit punch-card e.g.
*
* ┃08┃09┃10┃11┃12┃13┃14┃15┃16┃17┃18┃19┃20┃21┃22┃23┃00┃01┃02┃03┃04┃05┃06┃07┃
* Sun┃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Mon┃▁▁▁▄▄▄▅▅▅▅▅▅▄▄▄▆▆▆▇▇▇▇▇▇███▆▆▆▅▅▅▄▄▄▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Tue┃▁▁▁▃▃▃▆▆▆▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Wed┃▁▁▁▄▄▄▅▅▅▇▇▇▅▅▅▅▅▅███▇▇▇▅▅▅▆▆▆▇▇▇▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Thu┃▁▁▁▂▂▂▄▄▄▆▆▆▅▅▅▆▆▆▇▇▇▇▇▇▆▆▆▇▇▇▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
* Fri┃▁▁▁▂▂▂▄▄▄▅▅▅▅▅▅▄▄▄▄▄▄▅▅▅▅▅▅▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
@pathikrit
pathikrit / local.sbt
Created August 30, 2017 15:03
Local sbt
triggeredMessage := Watched.clearWhenTriggered
libraryDependencies += "com.lihaoyi" % "ammonite" % "latest.release" % "test" cross CrossVersion.full
initialCommands in (Test, console) := """ammonite.Main().run()"""
watchSources ++= (
(baseDirectory.value * "*.sbt").get
++ (baseDirectory.value / "project" * "*.scala").get
++ (baseDirectory.value / "project" * "*.sbt").get
@pathikrit
pathikrit / MajorityElement.scala
Last active June 12, 2017 15:49
Boyer–Moore majority vote algorithm
import scala.collection.generic.Growable
/**
* Boyer–Moore majority vote algorithm (https://en.wikipedia.org/wiki/Boyer–Moore_majority_vote_algorithm)
* A Data structure that supports O(1) tracking of the majority element in streaming data
* (i.e. something that occurs strictly > 50% of the time)
*/
class MajorityElement[A] extends Growable[A] {
private[this] var majorityElement = Option.empty[A]
private[this] var count = 0
@pathikrit
pathikrit / README.md
Last active November 2, 2019 22:20
Contravariance vs. Covariance
  • Let C<A> be a higher-kinded type e.g. in List<Animal>, List is C and Animal is A.
  • Let S be a subtype of T e.g. in class Cat extends Animal, Cat is S and Animal is T
  • If C<S> is a subtype of C<T>, then C is covaraint on T e.g. List<Cat> is a subtype of List<Animal>
  • If C<T> is a subtype of C<S>, then C is contravariant on T e.g. Predicate<Animal> is a subtype of Predicate<Cat>
  • If neither C<T> and nor C<S> are subtypes of the other, thenC is invariant on T
  • If both C<T> and C<S> are subtypes of each other, then C is phantom variant on T. This is possible in languages which support phantom types like Haskell

In Scala: