Last active
September 10, 2018 16:51
-
-
Save sblack4/b8d904acb0a3d7a256a7ff2d53e58cb0 to your computer and use it in GitHub Desktop.
Bootstrap files for the Realtime Analytics workshop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # by http://sblack4.github.io - [email protected] | |
| # updated 2018-07-30 | |
| # credit goes to David Bayard for providing the majority of this script | |
| # see his journey 2 (where I got a lot of this) below, I highly recommend it! | |
| # https://github.com/oracle/learning-library/tree/master/workshops/journey2-new-data-lake | |
| echo "Command being run: $0" | |
| # global variables | |
| DIRNAME=`dirname $0` | |
| BASENAME=`basename $0` | |
| BASEDIR=$(cd "$DIRNAME" ; pwd) | |
| _HOSTNAME=$(hostname -f) | |
| function check_root() { | |
| # check if we are root. otherwise, exit | |
| euid=`id -u` | |
| if [ $euid -ne 0 ]; then | |
| echo "This script must be run as root." | |
| echo "please try 'sudo ./bootstrap.sh' " | |
| exit 1 | |
| fi | |
| } | |
| function update_yum() { | |
| # clean yum | |
| echo "cleaning up yum metadata just in case" | |
| yum clean metadata | |
| # install helper tool: locate | |
| echo "installing my favorite tools" | |
| yum install -y mlocate git vim wget | |
| updatedb | |
| } | |
| function get_obj_store_url() { | |
| # detect if this is being run by hand or as part of instance creation | |
| # if part of instance creation, this file will live at /u01/app/oracle/tools/bdce/bdcsce/impl-20/vm-scripts | |
| if [ ${BASEDIR} = "/u01/app/oracle/tools/bdce/bdcsce/impl-20/vm-scripts" ] | |
| then | |
| echo This is being run automatically. | |
| objectStoreURL=$(getBaseObjectStoreUrl) | |
| else | |
| echo This is being run manually. | |
| BASEDIR=/u01/app/oracle/tools/bdce/bdcsce/impl-20/vm-scripts | |
| source ${BASEDIR}/constants.sh | |
| source ${BASEDIR}/bdcsce_bootstrap_helper.sh --source_only | |
| objectStoreURL=$(getBaseObjectStoreUrl) | |
| fi | |
| } | |
| function sudo_zeppelin() { | |
| # setup sudo for zeppelin | |
| echo "setting up sudoers for zeppelin" | |
| echo 'zeppelin ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers | |
| echo "last 10 lines of /etc/sudoers" | |
| tail -10 /etc/sudoers | |
| } | |
| function import_notebooks() { | |
| for i in $(getAmbariServerNodes); do | |
| if [ ${_HOSTNAME} = $i ]; then | |
| echo "running singleton Zeppelin section" | |
| echo "downloading lab notebooks" | |
| cd /tmp | |
| git clone https://gist.github.com/7206c7bb83f505a3450844310d4e3f4d.git notebooks | |
| # make sure not to use proxy server for this stuff | |
| export no_proxy='127.0.0.1' | |
| export NO_PROXY='127.0.0.1' | |
| # import notebooks | |
| # https://zeppelin.apache.org/docs/0.7.0/rest-api/rest-notebook.html#import-a-note | |
| echo "importing lab notebooks" | |
| # unzip -o Notes.zip | |
| sed -i -- "s~swift://\$CONTAINER.default~$objectStoreURL~g" *.json | |
| sed -i -- "s~swift://journeyC.default~$objectStoreURL~g" *.json | |
| for note in /tmp/notebooks/*.json | |
| do | |
| echo $note | |
| curl -X POST -d @"$note" -H "Content-Type: application/json" http://127.0.0.1:9995/api/notebook/import | |
| done | |
| # fix sh interpreter timeout and spark kafka dependency | |
| # https://zeppelin.apache.org/docs/0.7.0/rest-api/rest-interpreter.html | |
| echo "fixing sh interpreter timeout" | |
| cat <<EOF > /tmp/sh_settings.py | |
| #!/usr/local/bin/python | |
| #based on https://community.hortonworks.com/articles/36031/sample-code-to-automate-interacting-with-zeppelin.html by Ali Bajwa | |
| import time | |
| def post_request(url, body): | |
| import json, urllib2 | |
| encoded_body = json.dumps(body) | |
| req = urllib2.Request(str(url), encoded_body) | |
| req.get_method = lambda: 'PUT' | |
| try: | |
| response = urllib2.urlopen(req, encoded_body).read() | |
| except urllib2.HTTPError, error: | |
| print 'Exception: ' + error.read() | |
| jsonresp = json.loads(response.decode('utf-8')) | |
| print jsonresp['status'] | |
| import json, urllib2 | |
| zeppelin_int_url = 'http://127.0.0.1:9995/api/interpreter/setting/' | |
| data = json.load(urllib2.urlopen(zeppelin_int_url)) | |
| for body in data['body']: | |
| if body['group'] == 'sh': | |
| shbody = body | |
| elif body['group'] == 'spark': | |
| sparkbody = body | |
| shbody['properties']['shell.command.timeout.millisecs'] = '3000000' | |
| post_request(zeppelin_int_url + shbody['id'], shbody) | |
| #time.sleep(120) | |
| my_dict = {'groupArtifactVersion': 'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0', 'local': False} | |
| sparkbody['dependencies'].append(my_dict) | |
| #post_request(zeppelin_int_url + sparkbody['id'], sparkbody) | |
| EOF | |
| # cat /tmp/sh_settings.py | |
| python /tmp/sh_settings.py | |
| fi | |
| break | |
| done | |
| # end of the do this section only on 1 zeppelin server | |
| } | |
| function install_kafka_producer() { | |
| echo "starting bootstrap script to create a kafka producer on ubuntu" | |
| cd /opt | |
| # download and run anaconda installer in silent mode | |
| curl -O https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh | |
| sudo chmod +x Anaconda3-5.0.1-Linux-x86_64.sh | |
| ./Anaconda3-5.0.1-Linux-x86_64.sh -b -p $HOME/anaconda | |
| export PATH="$HOME/anaconda/bin:$PATH" | |
| # clone git repo with kafka producer | |
| git clone https://gist.github.com/7ceb0b255baa1030f9df789f14702489.git kafka_producer | |
| chmod -R 777 /opt/kafka_producer | |
| # cd kafka_producer | |
| # run it! (only after you've updated the config file - you can do this in the notebooks) | |
| # ./run.sh & | |
| } | |
| function get_streaming_files() { | |
| echo "downloading streaming files" | |
| last_dir=$(pwd) | |
| su hdfs | |
| cd /tmp | |
| wget -O lib.zip https://github.com/sblack4/spark-scala-kafka-consumer/blob/master/lib.zip?raw=true | |
| unzip lib.zip | |
| hadoop fs -put lib/spark-kafka-consumer_2.11-1.0.jar /spark | |
| hadoop fs -put lib/spark-streaming-kafka-0-8-assembly_2.11-2.1.1.jar /spark | |
| exit | |
| cd last_dir | |
| } | |
| function main() { | |
| echo "Running bootstrap for Realtime Analytics with OAC Datalake" | |
| check_root | |
| update_yum | |
| get_obj_store_url | |
| sudo_zeppelin | |
| import_notebooks | |
| install_kafka_producer | |
| echo "done with bootstrap for realtime analytics with OAC datalake" | |
| echo "Log file when this runs will be copied up to your default container." | |
| echo "Can also be viewed on the bdcsce server via: cat /u01/bdcsce/data/var/log/bootstrap.*" | |
| } | |
| main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment