Created
April 16, 2014 00:38
-
-
Save berngp/10793284 to your computer and use it in GitHub Desktop.
Spark Env Shell for YARN - Vagrant Hadoop 2.3.0 Cluster Pseudo distributed mode.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# This file contains environment variables required to run Spark. Copy it as | |
# spark-env.sh and edit that to configure Spark for your site. | |
# | |
# The following variables can be set in this file: | |
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node | |
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos | |
# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that | |
# we recommend setting app-wide options in the application's driver program. | |
# Examples of node-specific options : -Dspark.local.dir, GC options | |
# Examples of app-wide options : -Dspark.serializer | |
# | |
# If using the standalone deploy mode, you can also set variables for it here: | |
# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname | |
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports | |
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine | |
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g) | |
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT | |
# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node | |
# - SPARK_WORKER_DIR, to set the working directory of worker processes | |
# - SPARK_PUBLIC_DNS, to set the public dns name of the master | |
export JAVA_OPTS_ERROR_HANDLING="-XX:ErrorFile=/tmp/spark-shell-hs_err_pid.log \ | |
-XX:HeapDumpPath=/tmp/spark-shell-java_pid.hprof \ | |
-XX:-HeapDumpOnOutOfMemoryError" | |
export JAVA_OPTS_GC="-XX:-PrintGC -XX:-PrintGCDetails \ | |
-XX:-PrintGCTimeStamps \ | |
-XX:-PrintTenuringDistribution \ | |
-XX:-PrintAdaptiveSizePolicy \ | |
-XX:GCLogFileSize=1024K \ | |
-XX:-UseGCLogFileRotation \ | |
-Xloggc:/tmp/spark-shell-gc.log \ | |
-XX:+UseConcMarkSweepGC" | |
export JAVA_OPTS="$JAVA_OPTS_ERROR_HANDLING $JAVA_OPTS_GC" | |
# Need to bind to a specific interface such that YARN is able to contact the client. | |
export SPARK_JAVA_OPTS="$JAVA_OPTS -Dspark.cleaner.ttl=10000 -Dspark.driver.host=33.33.33.1" | |
#export SPARK_REPL_OPTS="$JAVA_OPTS $SPARK_REPL_OPTS" | |
#export HADOOP_HOME="/usr/lib/hadoop" | |
export HADOOP_HOME="/usr/local/Cellar/hadoop/2.2.0/libexec" | |
export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop" | |
export HDFS_URL="hdfs://spark-plug-bigtop-08.localdomain:8020" | |
#export SPARK_HOME="/vagrant/spark-dist/1.0.0-SNAPSHOT-hadoop_2.3.0-yarn-updates" | |
#export SPARK_EXECUTOR_URI="$HDFS_URL/user/vagrant/frameworks/spark-dist/1.0.0-SNAPSHOT-ada310a9/spark-assembly-1.0.0-SNAPSHOT-hadoop2.3.0.jar" | |
# YARN Env Configuration. | |
export SPARK_YARN_USER_ENV="JAVA_HOME=/usr/java/jdk1.7.0_51" | |
# Needts to be YARN Client | |
export MASTER="yarn-client" | |
export SPARK_JAR="dist/jars/spark-assembly-1.0.0-SNAPSHOT-hadoop2.3.0-yarn.jar" | |
# Running the Example PI | |
# HADOOP_USER_NAME=vagrant / | |
# SPARK_HADOOP_VERSION=2.3.0 / | |
# SPARK_YARN=true / | |
# with-jdk7 ./bin/spark-submit / | |
# --jars examples/target/scala-2.10/spark-examples_2.10-assembly-1.0.0-SNAPSHOT.jar / | |
# --name pi_example / | |
# --class org.apache.spark.examples.SparkPi / | |
# --deploy-mode client / | |
# --master yarn / | |
# --executor-cores 1 / | |
# --files log4j.properties / | |
# --arg yarn-client / | |
# --arg 2 / | |
# --verbose / | |
# examples/target/scala-2.10/spark-examples_2.10-assembly-1.0.0-SNAPSHOT.jar | |
# | |
#export SPARK_YARN_APP_JAR="examples/target/scala-2.10/spark-examples_2.10-assembly-1.0.0-SNAPSHOT.jar" | |
#export YARN_APPLICATION_CLASSPATH="$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*" | |
export YARN_APPLICATION_CLASSPATH="log4j.properties" | |
export SPARK_MASTER_MEMORY="400M" | |
export SPARK_DRIVER_MEMORY="400M" | |
export SPARK_WORKER_INSTANCES="1" | |
export SPARK_EXECUTOR_INSTANCES="1" | |
export SPARK_WORKER_MEMORY="400M" | |
export SPARK_EXECUTOR_MEMORY="400M" | |
export SPARK_WORKER_CORES="2" | |
export SPARK_EXECUTOR_CORES="1" | |
#export SPARK_YARN_QUEUE | |
#export SPARK_YARN_APP_NAME="Spar Shell" | |
#export SPARK_YARN_DIST_FILES | |
#export SPARK_YARN_DIST_ARCHIVES | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi ,
can i ask you please advise about my install ,
i have on cluster whith flowing features :
driver with 10 go
10 nodes each one with 10GO of ram ,
in the present setup i have spark 1.3 already installed and i want to use 1.5.2 prebuilt version without install
how i can do that please ?
thanks in advance