Skip to content

Instantly share code, notes, and snippets.

View JoshRosen's full-sized avatar

Josh Rosen JoshRosen

View GitHub Profile
@JoshRosen
JoshRosen / scapegoat-to-csv-spark.py
Created May 24, 2017 22:30
Scapegoat output to CSV converter for Spark
import xml.etree.ElementTree as ET
import glob
import fnmatch
import os
import csv
SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
SPARK_HOME = '/Users/joshrosen/Documents/spark/'
/* 001 */ public Object generate(Object[] references) {
/* 002 */ return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */ private Object[] references;
/* 008 */ private scala.collection.Iterator[] inputs;
/* 009 */ private boolean range_initRange_0;
/* 010 */ private long range_nextIndex_0;
diff --git a/OpenHashMap$mcD$sp.class.asm b/OpenHashMap$mcD$sp.class.asm
index 3989e91..ea49dbb 100644
--- a/OpenHashMap$mcD$sp.class.asm
+++ b/OpenHashMap$mcD$sp.class.asm
@@ -29,6 +29,54 @@ public class org/apache/spark/util/collection/OpenHashMap$mcD$sp extends org/apa
// access flags 0x1
public D nullValue$mcD$sp
+ // access flags 0x1019
+ public final static synthetic $anonfun$changeValue$3(Lorg/apache/spark/util/collection/OpenHashMap$mcD$sp;I)V
@JoshRosen
JoshRosen / bench.scala
Created October 21, 2021 21:34
toy benchmark of OpenHashMap
def timeAndRecordAllocations(
numWarmups: Int,
numTrials: Int
)(functionToBenchmark: => Unit): Unit = {
import java.lang.management.ManagementFactory
import com.sun.management.ThreadMXBean
val threadMxBean = ManagementFactory.getThreadMXBean.asInstanceOf[ThreadMXBean]
val threadId = Thread.currentThread.getId
@JoshRosen
JoshRosen / out.diff
Created October 21, 2021 22:05
Bytecode diff related to https://github.com/apache/spark/pull/34351 where I see what happens if I just change protected to private
diff --git a/OpenHashMap.class.asm b/OpenHashMap.class.asm
index aa713d0..b684cf3 100644
--- a/OpenHashMap.class.asm
+++ b/OpenHashMap.class.asm
@@ -1375,10 +1375,10 @@
MAXSTACK = 1
MAXLOCALS = 1
- // access flags 0x1
+ // access flags 0x2
@JoshRosen
JoshRosen / strip_info.py
Last active January 20, 2025 02:39
ChatGPT o1 generated code for dropping Info frames from MP3 files; largely untested and over-simplified, not production grade, YMMV, etc. See https://github.com/remsky/Kokoro-FastAPI/issues/57#issuecomment-2601202635 for motivating context
import struct
def is_mpeg2_l3_sync(header_int: int) -> bool:
"""
Returns True if the top 11 bits = 0x7FF (frame sync),
version_id == 2 (MPEG-2), and layer_index == 1 (Layer III).
"""
# Frame sync check
if ((header_int >> 21) & 0x7FF) != 0x7FF:
return False