Skip to content

Instantly share code, notes, and snippets.

@epishkin
Last active December 19, 2015 12:08
Show Gist options
  • Save epishkin/5952522 to your computer and use it in GitHub Desktop.
Save epishkin/5952522 to your computer and use it in GitHub Desktop.
script to upload oozie workflow / coordinator to hdfs

project structure:

.
├── oozie
│   ├── upload.sh
│   ├── combined_queries
│      ├── ...
│   └── simple_reports
│      ├── lib
│      │   ├── avro-1.7.4.jar
│      │   └── piggybank.jar
│      ├── scripts
│      │   ├── script-1.pig
│      │   └── script-2.pig
│      ├── coordinator.xml
│      ├── coordinator_script-1.properties
│      ├── coordinator_script-2.properties
│      ├── workflow.xml
│      ├── workflow_script-1.properties
│      └── workflow_script-2.properties
└── target
#!/bin/sh
if [ "${1}" = "" ] ; then
echo "Usage: upload.sh source_folder [namenode] [properties file for the preprocessor]"
echo " *namenode* is optional. Default value is 'sandbox'. Use 'namenode01' to upload to production."
echo " if 3rd parameter is not empty, 'appName' and 'frequency' properties are injected to #{appName} and #{frequency} placeholders"
echo
echo "Examples:"
echo " ./upload.sh simple_reports"
echo " ./upload.sh simple_reports namenode01"
echo " ./upload.sh simple_reports namenode01 simple_reports/coordinator_script-1.properties"
exit -1
fi
PROJECT=$1
if [ "${2}" = "" ] || [ "${2}" = "sandbox" ] ; then
HADOOP_CONF=$HDP_HOME/conf
NAME_NODE=sandbox:8020
OOZIE_CLI="oozie"
else
HADOOP_CONF=$HDP_HOME/conf-node01
NAME_NODE=namenode01:8020
OOZIE_CLI="oozie-node01"
fi
PROPERTIES_FILE=""
if [ ! "${3}" = "" ] ; then
PROPERTIES_FILE=${3}
fi
BASE_DIR=hdfs://${NAME_NODE}/oozie/deployments
DIST_DIR=../target/oozie/${PROJECT}
COPY_TO=${BASE_DIR}/${PROJECT}
hdfs() {
hadoop --config ${HADOOP_CONF} $@
}
preprocessXmlFiles() {
if [ ! ${PROPERTIES_FILE} = "" ] ; then
replaceProperty "appName"
replaceProperty "frequency"
fi
}
replaceProperty() {
propertyName=$1
value=`sed '/^\#/d' ${PROPERTIES_FILE} | grep "${propertyName}=" | tail -n 1 | sed 's/^.*=//'`
if [ ! ${value} = "" ] ; then
TOKEN="#{"${propertyName}"}"
echo "Replacing ${TOKEN} to '${value}' in ${DIST_DIR}/*.xml"
sed -i '' "s/${TOKEN}/${value}/" ${DIST_DIR}/*.xml
fi
}
rm -rf ${DIST_DIR}
mkdir -p ${DIST_DIR}
cp -R ${PROJECT}/lib ${DIST_DIR}
cp -R ${PROJECT}/scripts ${DIST_DIR}
cp ${PROJECT}/*.xml ${DIST_DIR}
preprocessXmlFiles
echo "Uploading to ${COPY_TO}"
hdfs fs -rmr ${COPY_TO}
hdfs fs -copyFromLocal ${DIST_DIR}/ ${BASE_DIR}/
echo ""
echo "Files copied to ${COPY_TO}"
hdfs fs -lsr ${COPY_TO}
echo ""
echo "To run/schedule this job execute:"
echo "${OOZIE_CLI} job -config ${PROJECT}/<CONFIG FILE> -run"
echo ""
echo "To see last 5 jobs:"
echo "${OOZIE_CLI} jobs -localtime -len 5"
echo ""
echo "To see last 5 scheduled jobs:"
echo "${OOZIE_CLI} jobs -localtime -len 5 -jobtype coord"
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment