Skip to content

Instantly share code, notes, and snippets.

View JoshRosen's full-sized avatar

Josh Rosen JoshRosen

View GitHub Profile
package org.apache.spark.sql
import java.io.File
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.StructType
import scala.util.Random
package org.apache.spark.sql
import java.io.File
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.StructType
import scala.util.Random
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.serializers.JavaSerializer
import com.esotericsoftware.kryo.io.{Input, Output}
class JavaSerializable extends Serializable
object KryoTest {
def main(args: Array[String]): Unit = {
val kryo = new Kryo()
[run]
branch = true
parallel = true
data_file = ${COVERAGE_DIR}/coverage_data/coverage
[html]
ignore_errors = true
package org.apache.spark.sql.catalyst.expressions.codegen
import org.codehaus.janino.SimpleCompiler
object CodeGenBenchmark {
def quasiquotes(): Unit = {
import scala.reflect.runtime.{universe => ru}
import scala.reflect.runtime.universe._
@JoshRosen
JoshRosen / gist:c70a1985a8d90627401b
Created July 30, 2015 23:32
Spark 1.4.1 Maven Dependency Tree with Scala 2.11
[INFO] --- maven-dependency-plugin:2.10:tree (default-cli) @ spark-core_2.11 ---
[INFO] org.apache.spark:spark-core_2.11:jar:1.4.1
[INFO] +- com.google.guava:guava:jar:14.0.1:provided
[INFO] +- com.twitter:chill_2.11:jar:0.5.0:compile
[INFO] | \- com.esotericsoftware.kryo:kryo:jar:2.21:compile
[INFO] | +- com.esotericsoftware.reflectasm:reflectasm:jar:shaded:1.07:compile
[INFO] | +- com.esotericsoftware.minlog:minlog:jar:1.2:compile
[INFO] | \- org.objenesis:objenesis:jar:1.2:compile
[INFO] +- com.twitter:chill-java:jar:0.5.0:compile
[INFO] +- org.apache.hadoop:hadoop-client:jar:2.2.0:compile
"""
Spaghetti code to delete comments from AmplabJenkins.
"""
import os
import sys
import requests
from link_header import parse as parse_link_header
import logging
import json
import org.apache.spark._
object Main {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().set("spark.speculation", "true")
val sc = new SparkContext("local[2, 4]", "test", conf)
//sc.setLogLevel("DEBUG")
sc.hadoopConfiguration.set("mapred.output.committer.class", classOf[MyOutputCommitter].getCanonicalName)
val tempDir = java.nio.file.Files.createTempDirectory("outputcommitter-test")
@JoshRosen
JoshRosen / gist:3340bbd893ae48a6526b
Created October 30, 2015 20:53
Caching RDD[String] via dataFrames (more efficient if file is highly compressible via dictionary encoding).
val fileToRead = "/path/to/my/file"
val df = sc.textFile(fileToRead).map(l => Tuple1(l)).toDF("line").cache
val rdd: RDD[String] = df.rdd.map(_.getString(0))
@JoshRosen
JoshRosen / Jinja module loader.md
Created November 29, 2015 22:37 — forked from voscausa/Jinja module loader.md
Jinja2 compiled templates module loader for App Engine Pyhton 2.7.

Jinja compiled templates module loader

This code is part of a Jinja CMS for Google App Engine Python 2.7 and NDB datastore

A Jinja enviroment is created for every CMS site: site_key_id = 'example

The modules are created using compiler.py The resulting code objects are stored in the dadastore using Kind Runtimes and a BlobProperty

The modules can also be saved / downloaded as .pyc in a zip archive: -compiled-templates.zip