Skip to content

Instantly share code, notes, and snippets.

@robenalt
Created November 9, 2016 18:37
Show Gist options
  • Save robenalt/5b06415f52009c5035910d91f5b919ad to your computer and use it in GitHub Desktop.
Save robenalt/5b06415f52009c5035910d91f5b919ad to your computer and use it in GitHub Desktop.
Sample pyspark context setting with configs params
# Set up spark configuration
conf = SparkConf().setMaster("yarn-client").setAppName("sparK-mer")
#conf = SparkConf().setMaster("local[16]").setAppName("sparK-mer")
conf.set("yarn.nodemanager.resource.cpu_vcores",args.C)
# Saturate with executors
conf.set("spark.executor.instances",executorInstances)
conf.set("spark.executor.heartbeatInterval","5s")
# cores per executor
conf.set("spark.executor.cores",args.E)
# set driver cores
conf.set("spark.driver.cores",12)
# Number of akka threads
conf.set("spark.akka.threads",256)
# Agregation worker memory
conf.set("spark.python.worker.memory","5g")
# Maximum message size in MB
conf.set("spark.akka.frameSize","128")
conf.set("spark.akka.timeout","200s")
conf.set("spark.akka.heartbeat.interval","10s")
#conf.set("spark.broadcast.blockSize","128m")
conf.set("spark.driver.maxResultSize", "20g")
conf.set("spark.reducer.maxSizeInFlight","5g")
conf.set("spark.executor.memory","7g")
#conf.set("spark.shuffle.memoryFraction",0.4)
#conf.set("spark.storage.memoryFraction",0.3)
#conf.set("spark.storage.unrollFraction",0.3)
#conf.set("spark.storage.memoryMapThreshold","256m")
#conf.set("spark.kryoserializer.buffer.max","1g")
#conf.set("spark.kryoserializer.buffer","128m")
#conf.set("spark.core.connection.ack.wait.timeout","600")
#conf.set("spark.shuffle.consolidateFiles","true")
#conf.set("spark.shuffle.file.buffer","32m")
conf.set("spark.shuffle.manager","sort")
conf.set("spark.shuffle.spill","true")
# Set up Spark Context
sc = SparkContext("","",conf=conf)
#sc.setLogLevel(args.L)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment