Skip to content

Instantly share code, notes, and snippets.

View skp33's full-sized avatar

Kaushal Prajapati skp33

View GitHub Profile
import org.apache.spark.sql.SparkSession;
public class SparkHiveTest {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Java Spark Hive Example")
.config("spark.master", "local")
implicit class DataFrameExtended(df: DataFrame) {
import df.sqlContext.implicits._
def anyNull(cols: Seq[Column]): Column = cols.map(_.isNull).reduce (_ || _)
/**
* LEFT JOIN should not join anything when join-key contains a NULL (but usually this
* would result in shuffling NULL keyed items into single or few reducers).
* This can be easily fixed by adding an additional temporary join condition that:
* - is a random seed when any of the keys is null, thus addressing the NULL skew
@skp33
skp33 / CreateTableStatement.scala
Created January 25, 2020 19:26
Create table schema from dataset schema
import java.lang.reflect.Method
import java.net.URI
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.execution.command.ShowCreateTableCommand
import org.apache.spark.sql.types.StructType
def showCreateTableCommand(
@skp33
skp33 / gist:8e2be47f9b50d704567e6e6cf6f8f7fd
Last active December 31, 2019 12:52
Spark Metrics URL
http://localhost:4040/metrics/json/
spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
@skp33
skp33 / SparkUtils.scala
Created November 20, 2018 12:23 — forked from ibuenros/SparkUtils.scala
Spark productionizing utilities developed by Ooyala, shown in Spark Summit 2014
//==================================================================
// SPARK INSTRUMENTATION
//==================================================================
import com.codahale.metrics.{MetricRegistry, Meter, Gauge}
import org.apache.spark.{SparkEnv, Accumulator}
import org.apache.spark.metrics.source.Source
import org.joda.time.DateTime
import scala.collection.mutable
@skp33
skp33 / PointType.scala
Created August 30, 2018 20:24 — forked from sadikovi/PointType.scala
Spark UDT and UDAF with custom buffer type
package org.apache.spark
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.types._
@SQLUserDefinedType(udt = classOf[PointType])
case class Point(mac: String, start: Long, end: Long) {
override def hashCode(): Int = {
31 * (31 * mac.hashCode) + start.hashCode
}
@skp33
skp33 / SudokuSolver.scala
Created July 27, 2018 17:19 — forked from pathikrit/SudokuSolver.scala
Sudoku Solver in Scala
val n = 9
val s = Math.sqrt(n).toInt
type Board = IndexedSeq[IndexedSeq[Int]]
def solve(board: Board, cell: Int = 0): Option[Board] = (cell%n, cell/n) match {
case (r, `n`) => Some(board)
case (r, c) if board(r)(c) > 0 => solve(board, cell + 1)
case (r, c) =>
def cells(i: Int) = Seq(board(r)(i), board(i)(c), board(s*(r/s) + i/s)(s*(c/s) + i%s))
def guess(x: Int) = solve(board.updated(r, board(r).updated(c, x)), cell + 1)
@skp33
skp33 / Deserialization.scala
Created April 6, 2018 18:22 — forked from ramn/Deserialization.scala
Object serialization example in Scala
import java.io._
@SerialVersionUID(15L)
class Animal(name: String, age: Int) extends Serializable {
override def toString = s"Animal($name, $age)"
}
case class Person(name: String)
// or fork := true in sbt
@skp33
skp33 / CogroupDf.scala
Created March 29, 2018 18:57 — forked from ahoy-jon/CogroupDf.scala
DataFrame.cogroup is the new HList.flatMap (UNFORTUNATELY, THIS IS VERY SLOW)
package org.apache.spark.sql.utils
import org.apache.spark.Partitioner
import org.apache.spark.rdd.{CoGroupedRDD, RDD}
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection}
import org.apache.spark.sql.execution.LogicalRDD
import org.apache.spark.sql.types.{ArrayType, StructField, StructType}
import org.apache.spark.sql.{SQLContext, DataFrame, Row}
import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag
@skp33
skp33 / reactive_map.js
Created March 17, 2018 15:19 — forked from granturing/reactive_map.js
Sample reactive Leaflet code for Zeppelin
<!-- place this in an %angular paragraph -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/0.7.5/leaflet.css" />
<div id="map" style="height: 800px; width: 100%"></div>
<script type="text/javascript">
function initMap() {
var map = L.map('map').setView([30.00, -30.00], 3);
L.tileLayer('http://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {