Skip to content

Instantly share code, notes, and snippets.

@emlyn
Created July 30, 2015 08:20
Show Gist options
  • Save emlyn/82d044dfe67d76db1191 to your computer and use it in GitHub Desktop.
Save emlyn/82d044dfe67d76db1191 to your computer and use it in GitHub Desktop.
import os
try:
sc
except NameError:
from pyspark import SparkContext, SparkConf
sc = SparkContext(conf=SparkConf().setAppName("lol"))
dirs = ['/home/hadoop/.versions/spark-1.4.0.b/classpath/emrfs/',
'/home/hadoop/.versions/2.4.0-amzn-5/share/hadoop/common/lib/']
jar = file('avro-1.7.6.jar', 'rb').read()
def look(_):
yield [d+f for d in dirs for f in os.listdir(d) if 'avro' in f]
def hack(_):
for d in dirs:
try:
os.rename(d + 'avro-1.7.4.jar',
d + 'avro-1.7.4.jar.bak')
except Exception:
pass
try:
os.unlink(d + 'avro-1.7.4.jar')
except Exception:
pass
with file(d + 'avro-1.7.6.jar', 'wb') as f:
f.write(jar)
yield 'h4xx0r'
df = sc.parallelize(range(1000))
print df.mapPartitions(look).collect()
print df.mapPartitions(hack).collect()
print df.mapPartitions(look).collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment