Last active
March 20, 2019 06:23
-
-
Save e-roux/aa6d987b4acc3e14195f7f7f7e1b7822 to your computer and use it in GitHub Desktop.
Hadoop on raspberry
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
####################################################### | |
# Install Java and other packages | |
####################################################### | |
sudo apt-get update | |
sudo apt-get --assume-yes install oracle-java8-jdk \ | |
emacs-nox autoconf cmake zlib1g-dev libsasl2-dev \ | |
software-properties-common build-essential automake \ | |
libtool cmake zlib1g-dev pkg-config libssl-dev \ | |
libsasl2-dev snappy libsnappy-dev bzip2 libbz2-dev \ | |
libjansson-dev fuse libfuse-dev zstd | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -- HADOOP ENVIRONMENT VARIABLES START -- # | |
export HADOOP_HOME=/opt/hadoop | |
export HIVE_HOME=/opt/hive | |
export SPARK_HOME=/opt/spark | |
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/sbin:$SPARK_HOME/sbin | |
export HADOOP_CONF_DIR=/etc/hadoop | |
export HADOOP_MAPRED_HOME=$HADOOP_HOME | |
export HADOOP_COMMON_HOME=$HADOOP_HOME | |
export HADOOP_HDFS_HOME=$HADOOP_HOME | |
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native | |
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" | |
# Remove messages | |
# WARN util.NativeCodeLoader: Unable to load native-hadoop | |
# library for your platform | |
export HADOOP_HOME_WARN_SUPPRESS=1 | |
export HADOOP_ROOT_LOGGER="WARN,DRFA" | |
# export HDFS_NAMENODE_USER=root | |
# export HDFS_DATANODE_USER=root | |
# export HDFS_SECONDARYNAMENODE_USER=root | |
export YARN_HOME=$HADOOP_HOME | |
export SPARK_CONF_DIR=/etc/spark | |
export SPARK_MASTER_HOST=localhost | |
# -- HADOOP ENVIRONMENT VARIABLES END -- # | |
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/jre | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ProtocolBuffers is an open source project supporting Google's | |
# ProtocolBuffer's platform-neutral and language-neutral interprocess-communication (IPC) and serialization framework. It has an Interface Definition Language (IDL) that is used to describe the wire- and file formats; this IDL is then pre-compiled into source code for the target languages (Python, Java and C++ included), which are then used in the applications. | |
cd /tmp | |
wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 | |
tar xf protobuf-2.5.0.tar.bz2 | |
cd protobuf-2.5.0/ | |
sh autogen.sh | |
./configure | |
make | |
make install | |
sudo ldconfig |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Source: | |
# https://developer.ibm.com/recipes/tutorials/building-a-hadoop-cluster-with-raspberry-pi/ | |
# Installation and expand Filesystem must be done manually on each node | |
# For ssh connection, a ssh file must be created in /boot directory | |
# 1. Add environment variables to .bashrc | |
# 2. Create the installation and HDFS directories | |
# On the master : /etc/hosts | |
# hdfs-site.xml | |
# yarn-site.xml | |
# core-site.xml | |
EMAIL="[email protected]" | |
get_latest_release_number() { | |
curl --silent "https://github.com/$1/releases/latest" | sed 's#.*tag/\(.*\)\".*#\1#'; | |
} | |
####################################################### | |
# 1. SW version and user | |
####################################################### | |
HIVE=$(curl --silent https://www-eu.apache.org/dist/hive/ | perl -ne "m/(?:hive-)(3[\.\d]*)/ && print $&") | |
declare -A VERSION | |
VERSION=(["HADOOP"]="3.2.0" \ | |
["SPARK"]="2.4.0" \ | |
["HIVE"]=${HIVE##*-}) | |
HADOOP_VERSION="3.2.0" | |
HIVE_VERSION=${HIVE##*-} | |
SPARK_VERSION="2.4.0" | |
HADOOP_USER=$USER | |
####################################################### | |
# 2. Add hadoop specific environment variables to .bashrc | |
####################################################### | |
sudo cp hadoop.bashrc /etc/profile.d/hadoop.sh | |
sudo chmod 644 /etc/profile.d/hadoop.sh | |
. /etc/profile.d/hadoop.sh | |
####################################################### | |
# 3. Create the installation and HDFS directories | |
####################################################### | |
sudo mkdir -p /opt/{hadoop,hdfs/{datanode,namenode},hive,presto/{etc/catalog,data},spark} | |
# Create config directory | |
sudo mkdir -p /etc/{hadoop,hive,impala,presto,spark} | |
sudo chown -R ${HADOOP_USER}:${HADOOP_USER} /opt/{hadoop,hdfs,hive,presto,spark} /etc/{hadoop,impala,presto,spark} | |
####################################################### | |
# 3. Setup connectivity. | |
####################################################### | |
# Bash 4 support associative arrays | |
# for host in "${!IPs[@]}"; | |
# do echo "$host - ${IPs[$host]}"; | |
# done | |
declare -A IPs | |
MASTER="hadoopmaster" | |
IPs=(["hadoopmaster"]="192.168.178.51" \ | |
["hadoopworker01"]="192.168.178.52") | |
CURRENT_IP=$(ip route get 1 | awk '{print $NF;exit}') | |
MASTER_IP=$(host ${MASTER} | awk '/has address/ { print $4 }') | |
# Set IS_MASTER if currently on master hadoop node | |
[[ $CURRENT_IP = $MASTER_IP ]] && IS_MASTER=true || IS_MASTER=false | |
####################################################### | |
# Generate and replicate SSH keys. | |
####################################################### | |
ssh-keygen -f $HOME/.ssh/id_rsa -N '' -t rsa -b 4096 -C ${EMAIL} | |
for host in "${!IPs[@]}"; | |
do echo $host; | |
ssh-copy-id -i $HOME/.ssh/id_rsa ${HADOOP_USER}@${host}; | |
sudo bash -c "echo -e \"${IPs[$host]}\t${host}\" >> /etc/hosts" | |
done | |
####################################################### | |
# Install Hadoop in the namenode | |
####################################################### | |
if IS_MASTER; | |
then | |
DIST=https://www-eu.apache.org/dist | |
curl -o /tmp/hadoop.tar.gz http://www-eu.apache.org/dist/hadoop/common/stable/hadoop-${HADOOP_VERSION}.tar.gz | |
curl -o /tmp/hive.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz | |
curl -o spark.tgz $DIST/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz | |
sudo tar xvf /tmp/hadoop.tar.gz \ | |
--directory=${HADOOP_HOME} \ | |
--exclude=hadoop-${HADOOP_VERSION}/share/doc \ | |
--strip 1 | |
sudo tar xvf /tmp/hive.tar.gz \ | |
--directory=${HIVE_HOME} \ | |
--exclude=apache-hive-${HIVE_VERSION}-bin/ql/src/test \ | |
--strip 1 | |
sudo tar xzvf /tmp/spark.tgz \ | |
--directory=${SPARK_HOME} \ | |
--strip 1 | |
fi | |
# Copy configuration to nodes | |
for host in "${!IPs[@]}"; | |
do [ $host != $MASTER ] && \ | |
# sudo scp -r $HADOOP_CONF_DIR pi@${host}:${HADOOP_CONF_DIR} | |
sudo scp $HADOOP_CONF_DIR/{core-site.xml,hadoop-env.sh,hdfs-site.xml,mapred-site.xml,yarn-site.xml,workers} \ | |
pi@${host}:${HADOOP_CONF_DIR} | |
done | |
rm ${HADOOP_HOME}/{sbin,bin}/*.cmd | |
rm ${HADOOP_CONF_DIR}/*.cmd | |
rmdir ${HADOOP_HOME}/etc | |
####################################################### | |
# Add the master and slaves files | |
####################################################### | |
# Only in the Master node. | |
if IS_MASTER; | |
then | |
sudo bash -c "echo \"${MASTER}\" >> ${HADOOP_CONF_DIR}/master" | |
for host in "${!IPs[@]}"; | |
do [ $host != $MASTER ] && \ | |
sudo bash -c "echo \"${host}\" >> ${HADOOP_CONF_DIR}/slaves" | |
done | |
fi | |
if [ $CURRENT_IP = $MASTER_IP ]; | |
then sudo mkdir -p /opt/hdfs/namenode | |
else sudo mkdir -p /opt/hdfs/datanode | |
fi | |
####################################################### | |
# Copy the basic configuration to the slave nodes | |
####################################################### | |
if IS_MASTER; | |
then | |
for host in "${!IPs[@]}"; | |
do [ $host != $MASTER ] && \ | |
rsync -avxP ${HADOOP_HOME} ${HADOOP_USER}@${host}:/opt/hadoop | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment