Skip to content

Instantly share code, notes, and snippets.

View JoshRosen's full-sized avatar

Josh Rosen JoshRosen

View GitHub Profile
@JoshRosen
JoshRosen / ObjectGraphVisualizer.scala
Created November 5, 2014 18:41
Object graph visualization for debugging serialization issues
import java.lang.reflect.{Modifier, Field}
import com.google.common.collect.Sets
import scala.collection.mutable
import scala.collection.JavaConversions._
/**
* Generates GraphViz DOT files for visualizing Java object graphs.
*/
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
@JoshRosen
JoshRosen / gist:46699e66ea5bf18c71f1
Created February 20, 2015 17:19
Reasons why spark-avro 0.1 is incompatible with Spark 1.1.0 (generated by JarHeaven)
Method<InternalName(org/apache/spark/sql/api/java/JavaSQLContext)/baseRelationToSchemaRDD: MethodDescriptor((Lorg/apache/spark/sql/sources/BaseRelation;)Lorg/apache/spark/sql/api/java/JavaSchemaRDD;)>
Method<InternalName(org/apache/spark/sql/catalyst/types/StructField$)/apply$default$4: MethodDescriptor(()Lorg/apache/spark/sql/catalyst/util/Metadata;)>
Method<InternalName(org/apache/spark/sql/SQLContext)/baseRelationToSchemaRDD: MethodDescriptor((Lorg/apache/spark/sql/sources/BaseRelation;)Lorg/apache/spark/sql/SchemaRDD;)>
Method<InternalName(org/apache/spark/sql/catalyst/types/StructField$)/apply: MethodDescriptor((Ljava/lang/String;Lorg/apache/spark/sql/catalyst/types/DataType;ZLorg/apache/spark/sql/catalyst/util/Metadata;)Lorg/apache/spark/sql/catalyst/types/StructField;)>
Method<InternalName(org/apache/spark/sql/sources/TableScan)/<init>: MethodDescriptor(()V)>
@JoshRosen
JoshRosen / SnappyBenchmark.scala
Created April 9, 2015 02:10
Exploring whether changes to snappy-java have resulted in worse compression in newer versions
import java.io._
import org.xerial.snappy.SnappyOutputStream
object Main {
def main(args: Array[String]) {
val blockSize = sys.env.getOrElse("BLOCK_SIZE", "32768").toInt
val resetFrequency = sys.env.getOrElse("RESET_FREQUENCY", "100").toInt
val byteArrayOutputstream = new ByteArrayOutputStream()
package org.apache.spark.sql
import org.apache.spark.unsafe.memory.{MemoryAllocator, TaskMemoryManager, ExecutorMemoryManager}
import scala.util.Random
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
/**
package org.apache.spark.sql
import org.apache.spark.sql.catalyst.expressions.{UnsafeFixedWidthAggregationMap, SpecificMutableRow, MutableRow, GenericRow}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.memory.MemoryAllocator
import org.openjdk.jmh.annotations._
import scala.util.Random
import java.io.{PrintWriter, FileOutputStream, File}
import org.apache.spark._
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.rdd.{RDD, ShuffledRDD}
import org.apache.spark.scheduler.{SparkListenerStageSubmitted, SparkListenerTaskEnd, SparkListener}
/**
* Extends [[ShuffledRDD]] to skip the shuffle fetching.
*/
package org.apache.spark.sql
import org.apache.spark.unsafe.PlatformDependent
import org.apache.spark.unsafe.map.BytesToBytesMap
import org.apache.spark.unsafe.memory.{MemoryAllocator, TaskMemoryManager, ExecutorMemoryManager}
import scala.util.Random
/**
* This benchmark measures the time to iterate over a BytesToBytesMap.
package org.apache.spark.shuffle.unsafe;
import org.apache.spark.JavaAPISuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
@RunWith(Suite.class)
@Suite.SuiteClasses({
UnsafeShuffleWriterSuite.class,
JavaAPISuite.class