e-roux · March 20, 2019 06:23
diff --git a/dependencies.sh b/dependencies.sh
 #!/bin/bash

 #######################################################
 # Install Java and other packages
 #######################################################
 sudo apt-get update
 sudo apt-get --assume-yes install oracle-java8-jdk \
    emacs-nox autoconf cmake zlib1g-dev libsasl2-dev \
    software-properties-common build-essential automake \
    libtool cmake zlib1g-dev pkg-config libssl-dev \
    libsasl2-dev snappy libsnappy-dev bzip2 libbz2-dev \
    libjansson-dev fuse libfuse-dev zstd




diff --git a/hadoop.bashrc b/hadoop.bashrc

 # -- HADOOP ENVIRONMENT VARIABLES START -- #
 export HADOOP_HOME=/opt/hadoop
 export HIVE_HOME=/opt/hive
 export SPARK_HOME=/opt/spark

 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/sbin:$SPARK_HOME/sbin

 export HADOOP_CONF_DIR=/etc/hadoop
 export HADOOP_MAPRED_HOME=$HADOOP_HOME
 export HADOOP_COMMON_HOME=$HADOOP_HOME
 export HADOOP_HDFS_HOME=$HADOOP_HOME
 export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
 export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
 # Remove messages
 # WARN util.NativeCodeLoader: Unable to load native-hadoop
 # library for your platform
 export HADOOP_HOME_WARN_SUPPRESS=1
 export HADOOP_ROOT_LOGGER="WARN,DRFA"

 # export HDFS_NAMENODE_USER=root
 # export HDFS_DATANODE_USER=root
 # export HDFS_SECONDARYNAMENODE_USER=root

 export YARN_HOME=$HADOOP_HOME

 export SPARK_CONF_DIR=/etc/spark
 export SPARK_MASTER_HOST=localhost
 # -- HADOOP ENVIRONMENT VARIABLES END -- #

 export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/jre


diff --git a/ProtocolBuffer.sh b/ProtocolBuffer.sh
 #!/bin/bash

 # ProtocolBuffers is an open source project supporting Google's
 # ProtocolBuffer's platform-neutral and language-neutral interprocess-communication (IPC) and serialization framework. It has an Interface Definition Language (IDL) that is used to describe the wire- and file formats; this IDL is then pre-compiled into source code for the target languages (Python, Java and C++ included), which are then used in the applications.


 cd /tmp
 wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2

 tar xf protobuf-2.5.0.tar.bz2 
 cd protobuf-2.5.0/

 sh autogen.sh 
 ./configure
 make
 make install

 sudo ldconfig
diff --git a/worker.sh b/worker.sh
 #!/bin/bash
 #
 # Source:
 # https://developer.ibm.com/recipes/tutorials/building-a-hadoop-cluster-with-raspberry-pi/

 # Installation and expand Filesystem must be done manually on each node
 # For ssh connection, a ssh file must be created in /boot directory

 # 1. Add environment variables to .bashrc
 # 2. Create the installation and HDFS directories

 # On the master : /etc/hosts

 # hdfs-site.xml
 # yarn-site.xml
 # core-site.xml

 EMAIL="[email protected]"

 get_latest_release_number() {
    curl --silent "https://github.com/$1/releases/latest" | sed 's#.*tag/\(.*\)\".*#\1#';
 }

 #######################################################
 # 1. SW version and user
 #######################################################
 HIVE=$(curl --silent https://www-eu.apache.org/dist/hive/ | perl -ne "m/(?:hive-)(3[\.\d]*)/ && print $&")

 declare -A VERSION
 VERSION=(["HADOOP"]="3.2.0" \
         ["SPARK"]="2.4.0" \
         ["HIVE"]=${HIVE##*-})

 HADOOP_VERSION="3.2.0"
 HIVE_VERSION=${HIVE##*-}
 SPARK_VERSION="2.4.0"

 HADOOP_USER=$USER

 #######################################################
 # 2. Add hadoop specific environment variables to .bashrc
 #######################################################
 sudo cp hadoop.bashrc /etc/profile.d/hadoop.sh
 sudo chmod 644 /etc/profile.d/hadoop.sh

 . /etc/profile.d/hadoop.sh

 #######################################################
 # 3. Create the installation and HDFS directories
 #######################################################
 sudo mkdir -p /opt/{hadoop,hdfs/{datanode,namenode},hive,presto/{etc/catalog,data},spark}

 # Create config directory
 sudo mkdir -p /etc/{hadoop,hive,impala,presto,spark}

 sudo chown -R ${HADOOP_USER}:${HADOOP_USER} /opt/{hadoop,hdfs,hive,presto,spark} /etc/{hadoop,impala,presto,spark}

 #######################################################
 # 3. Setup connectivity.
 #######################################################
 # Bash 4 support associative arrays
 # for host in "${!IPs[@]}";
 # do echo "$host - ${IPs[$host]}";
 # done
 declare -A IPs
 MASTER="hadoopmaster"
 IPs=(["hadoopmaster"]="192.168.178.51" \
     ["hadoopworker01"]="192.168.178.52")

 CURRENT_IP=$(ip route get 1 | awk '{print $NF;exit}')
 MASTER_IP=$(host ${MASTER} | awk '/has address/ { print $4 }')

 # Set IS_MASTER if currently on master hadoop node
 [[ $CURRENT_IP = $MASTER_IP ]] && IS_MASTER=true || IS_MASTER=false

 #######################################################
 # Generate and replicate SSH keys.
 #######################################################
 ssh-keygen  -f $HOME/.ssh/id_rsa -N '' -t rsa -b 4096 -C ${EMAIL}

 for host in "${!IPs[@]}";
 do echo $host;
   ssh-copy-id -i $HOME/.ssh/id_rsa ${HADOOP_USER}@${host};
   sudo bash -c "echo -e \"${IPs[$host]}\t${host}\" >> /etc/hosts"
 done

 #######################################################
 # Install Hadoop in the namenode
 #######################################################
 if IS_MASTER;
 then

    DIST=https://www-eu.apache.org/dist

    curl -o /tmp/hadoop.tar.gz http://www-eu.apache.org/dist/hadoop/common/stable/hadoop-${HADOOP_VERSION}.tar.gz    
    curl -o /tmp/hive.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz   
    curl -o spark.tgz $DIST/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz

    sudo tar xvf /tmp/hadoop.tar.gz \
        --directory=${HADOOP_HOME} \
        --exclude=hadoop-${HADOOP_VERSION}/share/doc \
        --strip 1

    sudo tar xvf /tmp/hive.tar.gz \
        --directory=${HIVE_HOME} \
        --exclude=apache-hive-${HIVE_VERSION}-bin/ql/src/test \
        --strip 1

    sudo tar xzvf /tmp/spark.tgz \
        --directory=${SPARK_HOME} \
        --strip 1

 fi


 # Copy configuration to nodes
 for host in "${!IPs[@]}";
 do [ $host != $MASTER ] && \
    # sudo scp -r $HADOOP_CONF_DIR pi@${host}:${HADOOP_CONF_DIR}
    sudo scp $HADOOP_CONF_DIR/{core-site.xml,hadoop-env.sh,hdfs-site.xml,mapred-site.xml,yarn-site.xml,workers} \
    pi@${host}:${HADOOP_CONF_DIR} 
 done


 rm ${HADOOP_HOME}/{sbin,bin}/*.cmd 
 rm ${HADOOP_CONF_DIR}/*.cmd
 rmdir ${HADOOP_HOME}/etc
 #######################################################
 # Add the master and slaves files
 #######################################################
 # Only in the Master node.

 if IS_MASTER;
 then
    sudo bash -c "echo \"${MASTER}\" >> ${HADOOP_CONF_DIR}/master"
    for host in "${!IPs[@]}";
    do [ $host != $MASTER ] && \
        sudo bash -c "echo \"${host}\" >> ${HADOOP_CONF_DIR}/slaves"
    done
 fi

 if [ $CURRENT_IP = $MASTER_IP ];
 then sudo mkdir -p /opt/hdfs/namenode
 else sudo mkdir -p /opt/hdfs/datanode
 fi

 #######################################################
 # Copy the basic configuration to the slave nodes
 #######################################################
 if IS_MASTER;
 then
    for host in "${!IPs[@]}";
    do [ $host != $MASTER ] && \
        rsync -avxP ${HADOOP_HOME} ${HADOOP_USER}@${host}:/opt/hadoop
    done
 fi
	#!/bin/bash

	#######################################################
	# Install Java and other packages
	#######################################################
	sudo apt-get update
	sudo apt-get --assume-yes install oracle-java8-jdk \
	emacs-nox autoconf cmake zlib1g-dev libsasl2-dev \
	software-properties-common build-essential automake \
	libtool cmake zlib1g-dev pkg-config libssl-dev \
	libsasl2-dev snappy libsnappy-dev bzip2 libbz2-dev \
	libjansson-dev fuse libfuse-dev zstd

	# -- HADOOP ENVIRONMENT VARIABLES START -- #
	export HADOOP_HOME=/opt/hadoop
	export HIVE_HOME=/opt/hive
	export SPARK_HOME=/opt/spark

	export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/sbin:$SPARK_HOME/sbin

	export HADOOP_CONF_DIR=/etc/hadoop
	export HADOOP_MAPRED_HOME=$HADOOP_HOME
	export HADOOP_COMMON_HOME=$HADOOP_HOME
	export HADOOP_HDFS_HOME=$HADOOP_HOME
	export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
	export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
	# Remove messages
	# WARN util.NativeCodeLoader: Unable to load native-hadoop
	# library for your platform
	export HADOOP_HOME_WARN_SUPPRESS=1
	export HADOOP_ROOT_LOGGER="WARN,DRFA"

	# export HDFS_NAMENODE_USER=root
	# export HDFS_DATANODE_USER=root
	# export HDFS_SECONDARYNAMENODE_USER=root

	export YARN_HOME=$HADOOP_HOME

	export SPARK_CONF_DIR=/etc/spark
	export SPARK_MASTER_HOST=localhost
	# -- HADOOP ENVIRONMENT VARIABLES END -- #

	export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/jre
	#!/bin/bash

	# ProtocolBuffers is an open source project supporting Google's
	# ProtocolBuffer's platform-neutral and language-neutral interprocess-communication (IPC) and serialization framework. It has an Interface Definition Language (IDL) that is used to describe the wire- and file formats; this IDL is then pre-compiled into source code for the target languages (Python, Java and C++ included), which are then used in the applications.


	cd /tmp
	wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2

	tar xf protobuf-2.5.0.tar.bz2
	cd protobuf-2.5.0/

	sh autogen.sh
	./configure
	make
	make install

	sudo ldconfig
	#!/bin/bash
	#
	# Source:
	# https://developer.ibm.com/recipes/tutorials/building-a-hadoop-cluster-with-raspberry-pi/

	# Installation and expand Filesystem must be done manually on each node
	# For ssh connection, a ssh file must be created in /boot directory

	# 1. Add environment variables to .bashrc
	# 2. Create the installation and HDFS directories

	# On the master : /etc/hosts

	# hdfs-site.xml
	# yarn-site.xml
	# core-site.xml

	EMAIL="[email protected]"

	get_latest_release_number() {
	curl --silent "https://github.com/$1/releases/latest" \| sed 's#.tag/\(.\)\".*#\1#';
	}

	#######################################################
	# 1. SW version and user
	#######################################################
	HIVE=$(curl --silent https://www-eu.apache.org/dist/hive/ \| perl -ne "m/(?:hive-)(3[\.\d]*)/ && print $&")

	declare -A VERSION
	VERSION=(["HADOOP"]="3.2.0" \
	["SPARK"]="2.4.0" \
	["HIVE"]=${HIVE##*-})

	HADOOP_VERSION="3.2.0"
	HIVE_VERSION=${HIVE##*-}
	SPARK_VERSION="2.4.0"

	HADOOP_USER=$USER

	#######################################################
	# 2. Add hadoop specific environment variables to .bashrc
	#######################################################
	sudo cp hadoop.bashrc /etc/profile.d/hadoop.sh
	sudo chmod 644 /etc/profile.d/hadoop.sh

	. /etc/profile.d/hadoop.sh

	#######################################################
	# 3. Create the installation and HDFS directories
	#######################################################
	sudo mkdir -p /opt/{hadoop,hdfs/{datanode,namenode},hive,presto/{etc/catalog,data},spark}

	# Create config directory
	sudo mkdir -p /etc/{hadoop,hive,impala,presto,spark}

	sudo chown -R ${HADOOP_USER}:${HADOOP_USER} /opt/{hadoop,hdfs,hive,presto,spark} /etc/{hadoop,impala,presto,spark}

	#######################################################
	# 3. Setup connectivity.
	#######################################################
	# Bash 4 support associative arrays
	# for host in "${!IPs[@]}";
	# do echo "$host - ${IPs[$host]}";
	# done
	declare -A IPs
	MASTER="hadoopmaster"
	IPs=(["hadoopmaster"]="192.168.178.51" \
	["hadoopworker01"]="192.168.178.52")

	CURRENT_IP=$(ip route get 1 \| awk '{print $NF;exit}')
	MASTER_IP=$(host ${MASTER} \| awk '/has address/ { print $4 }')

	# Set IS_MASTER if currently on master hadoop node
	[[ $CURRENT_IP = $MASTER_IP ]] && IS_MASTER=true \|\| IS_MASTER=false

	#######################################################
	# Generate and replicate SSH keys.
	#######################################################
	ssh-keygen -f $HOME/.ssh/id_rsa -N '' -t rsa -b 4096 -C ${EMAIL}

	for host in "${!IPs[@]}";
	do echo $host;
	ssh-copy-id -i $HOME/.ssh/id_rsa ${HADOOP_USER}@${host};
	sudo bash -c "echo -e \"${IPs[$host]}\t${host}\" >> /etc/hosts"
	done

	#######################################################
	# Install Hadoop in the namenode
	#######################################################
	if IS_MASTER;
	then

	DIST=https://www-eu.apache.org/dist

	curl -o /tmp/hadoop.tar.gz http://www-eu.apache.org/dist/hadoop/common/stable/hadoop-${HADOOP_VERSION}.tar.gz
	curl -o /tmp/hive.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz
	curl -o spark.tgz $DIST/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz

	sudo tar xvf /tmp/hadoop.tar.gz \
	--directory=${HADOOP_HOME} \
	--exclude=hadoop-${HADOOP_VERSION}/share/doc \
	--strip 1

	sudo tar xvf /tmp/hive.tar.gz \
	--directory=${HIVE_HOME} \
	--exclude=apache-hive-${HIVE_VERSION}-bin/ql/src/test \
	--strip 1

	sudo tar xzvf /tmp/spark.tgz \
	--directory=${SPARK_HOME} \
	--strip 1

	fi


	# Copy configuration to nodes
	for host in "${!IPs[@]}";
	do [ $host != $MASTER ] && \
	# sudo scp -r $HADOOP_CONF_DIR pi@${host}:${HADOOP_CONF_DIR}
	sudo scp $HADOOP_CONF_DIR/{core-site.xml,hadoop-env.sh,hdfs-site.xml,mapred-site.xml,yarn-site.xml,workers} \
	pi@${host}:${HADOOP_CONF_DIR}
	done


	rm ${HADOOP_HOME}/{sbin,bin}/*.cmd
	rm ${HADOOP_CONF_DIR}/*.cmd
	rmdir ${HADOOP_HOME}/etc
	#######################################################
	# Add the master and slaves files
	#######################################################
	# Only in the Master node.

	if IS_MASTER;
	then
	sudo bash -c "echo \"${MASTER}\" >> ${HADOOP_CONF_DIR}/master"
	for host in "${!IPs[@]}";
	do [ $host != $MASTER ] && \
	sudo bash -c "echo \"${host}\" >> ${HADOOP_CONF_DIR}/slaves"
	done
	fi

	if [ $CURRENT_IP = $MASTER_IP ];
	then sudo mkdir -p /opt/hdfs/namenode
	else sudo mkdir -p /opt/hdfs/datanode
	fi

	#######################################################
	# Copy the basic configuration to the slave nodes
	#######################################################
	if IS_MASTER;
	then
	for host in "${!IPs[@]}";
	do [ $host != $MASTER ] && \
	rsync -avxP ${HADOOP_HOME} ${HADOOP_USER}@${host}:/opt/hadoop
	done
	fi