Skip to content

Instantly share code, notes, and snippets.

@aht
Last active September 21, 2016 20:26
Show Gist options
  • Save aht/2fe1c3cb2a25e1a8a62400bbfb211bc2 to your computer and use it in GitHub Desktop.
Save aht/2fe1c3cb2a25e1a8a62400bbfb211bc2 to your computer and use it in GitHub Desktop.
xgb-workflow.sh
#!/bin/bash
set -e
SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
echo SCRIPT_DIR=$SCRIPT_DIR
cd $SCRIPT_DIR
S3_INPUT_URL=${1}
S3_OUTPUT_URL=${2}
AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-$3}
AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-$4}
echo S3_INPUT_URL=${S3_INPUT_URL}
echo S3_OUTPUT_URL=${S3_OUTPUT_URL}
shift
function check_param() {
if test -z $1; then
echo "Usage: bash xgb-workflow.sh <S3_INPUT_URL> <S3_OUTPUT_URL> [AWS_ACCESS_KEY_ID] [AWS_SECRET_ACCESS_KEY]"
echo "Note that AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY will use environment variables if there are any"
exit -1
fi
}
check_param ${S3_INPUT_URL}
check_param ${AWS_ACCESS_KEY_ID}
check_param ${AWS_SECRET_ACCESS_KEY}
(
echo "#### "
echo "#### Enter ETL stage..."
# spark-submit
/opt/spark-1.6.2-bin-hadoop2.6/bin/spark-submit --master yarn --deploy-mode client ./etl_transaction.py \
${S3_INPUT_URL} \
${AWS_ACCESS_KEY_ID} \
${AWS_SECRET_ACCESS_KEY}
echo "#### "
echo "#### Enter Forecast stage..."
Rscript ./forecast.R
echo "#### "
echo "#### Uploading outputs to S3 location: ${S3_OUTPUT_URL}/${latest_output_dir}/ ..."
if ! location="$(type -p aws)" || [ -z "$location" ]; then
pip install awscli
fi
export AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
export AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-"us-east-1"}
latest_output_dir=$(ls -rt -1d output-* | head)
aws s3 cp --recursive ${latest_output_dir} ${S3_OUTPUT_URL}/${latest_output_dir}/
echo "#### "
echo "#### Workflow completed!"
) 2>&1 | tee ./xgb-workflow.log;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment