Skip to content

Instantly share code, notes, and snippets.

@cwensel
Last active August 29, 2015 14:11
Show Gist options
  • Select an option

  • Save cwensel/e5336ca8cd97b3b4ad5b to your computer and use it in GitHub Desktop.

Select an option

Save cwensel/e5336ca8cd97b3b4ad5b to your computer and use it in GitHub Desktop.
# change the yarn bucket name
OPTIONS="--bootstrap-action s3://elasticmapreduce/bootstrap-actions/configure-hadoop"
OPTIONS="${OPTIONS} --args -y,yarn.log-aggregation-enable=true,-y,yarn.log-aggregation.retain-seconds=-1,-y,yarn.log-aggregation.retain-check-interval-seconds=3000,-y,yarn.nodemanager.remote-app-log-dir=s3://your-bucket/emr/yarn-logs"
#!/bin/bash
GATHER=64
FILE_SIZE=256
NUM_FILES=8
LOAD="-I input -O output -W working -S s3n://dev.concurrentinc.com/load-benchmark/stats/ -SLS -NR $GATHER -ALL --generate-num-files $NUM_FILES --generate-file-size $FILE_SIZE --generate-words-mean 0 -MXCF 1"
mkdir -p logs
hdfs dfs -mkdir -p /user/
hdfs dfs -chmod -R 777 /user/
hdfs dfs -mkdir -p /apps/
hdfs dfs -chmod -R 777 /apps/
hdfs dfs -copyToLocal s3n://files.cascading.org/third-party/yarn/apps/tez-0.5.1/ tez-0.5.1
hdfs dfs -cp s3n://files.cascading.org/third-party/yarn/apps/tez-0.5.1/ /apps/tez-0.5.1
export HADOOP_CLASSPATH=~/tez-0.5.1/*:~/tez-0.5.1/lib/*:$HADOOP_CLASSPATH
./.versions/2.4.0/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start historyserver
echo "Starting Tez load"
# tez.shuffle-vertex-manager.enable.auto-parallel=true
# tez.am.mode.session=true
hadoop jar load-hadoop2-tez-*.jar --platform hadoop2-tez $LOAD \
-DH 'tez.lib.uris=${fs.default.name}/apps/tez-0.5.1,${fs.default.name}/apps/tez-0.5.1/lib/' \
-DH "yarn.timeline-service.hostname=$HOSTNAME" \
-DH 'io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec' \
-DH 'mapred.output.committer.class=org.apache.hadoop.mapred.FileOutputCommitter' &> logs/out-tez.txt
export HADOOP_CLASSPATH=
echo "Starting MR load"
hadoop jar load-hadoop2-mr1-*.jar --platform hadoop2-mr1 $LOAD \
-DH 'io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec' \
-DH 'mapred.output.committer.class=org.apache.hadoop.mapred.FileOutputCommitter' &> logs/out-mr1.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment