Skip to content

Instantly share code, notes, and snippets.

@dotsh
Last active November 3, 2017 15:16
Show Gist options
  • Save dotsh/48e7ef7efde14252ba42a3286d17a765 to your computer and use it in GitHub Desktop.
Save dotsh/48e7ef7efde14252ba42a3286d17a765 to your computer and use it in GitHub Desktop.
# works from ubuntu 16.04.3 LTS
sudo apt-get update
sudo apt-get install -y lxc bridge-utils vim
url="https://releases.hashicorp.com"
latest=$(wget -qO- ${url}/vagrant/ | grep -o -P '/vagrant/[^"]*' | head -n 1)
deb64package=$(wget -qO- ${url}${latest} | grep x86_64.deb | grep -o -P '/vagrant/[^"]*')
wget -O vagrant_latest.deb ${url}${deb64package}
sudo dpkg -i vagrant_latest.deb
ssh-keygen -t rsa -f /home/ubuntu/.ssh/id_rsa -N ""
vagrant plugin install vagrant-lxc vagrant-hostmanager
ulimit -n 10000
mkdir virtual-hadoop-cluster-lxc
cd virtual-hadoop-cluster-lxc
## restart here with this command if any problem ## sudo su -c 'for i in $(lxc-ls) ; do lxc-stop --name $i ; lxc-destroy --name $i ;done' ; rm Vagrantfile
wget -O Vagrantfile j1.re/Vgrfile1
export mypubkey=$(cat ~/.ssh/id_rsa.pub | sed -e 's#\/#\\\/#g' -e 's# #\\ #g' )
sed -i -s "s/^ssh-rsa.*$/${mypubkey}/" Vagrantfile
vagrant up --provider=lxc
vagrant reload
for i in $(seq 1 4) ; do ping -c 1 -w 0 node${i}; done
sudo su -c 'for i in $(lxc-ls) ; do lxc-info --name $i ; done'
for i in $(seq 1 4) ; do ssh -nT vagrant@node${i} uptime ; done
##################################
#
# here need to type "yes" n times
#
#################################
wget http://archive.cloudera.com/cdh5/one-click-install/trusty/amd64/cdh5-repository_1.0_all.deb
for i in $(seq 1 4) ; do scp cdh5-repository*.deb vagrant@vm-cluster-node${i}:. ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo apt-get update ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo apt-get install apt-transport-https ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo dpkg -i cdh5-repository*.deb ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo apt-key update ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo apt-get update ; done
# java
wget data2b.org/java/jdk-7u55-linux-x64.tar.gz
for i in $(seq 1 4) ; do scp jdk-7u55-linux-x64.tar.gz vagrant@vm-cluster-node${i}:. ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo mkdir /usr/java/ ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo tar -xzf jdk-7u55-linux-x64.tar.gz -C /usr/java/ ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo ln -s /usr/java/jdk1.7.0_55 /usr/java/default ; done
# Cloudera Hadoop Distribution (CDH)
ssh -nT vagrant@vm-cluster-node1 sudo apt-get -y install hadoop-yarn-resourcemanager hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-mapreduce-historyserver hadoop-yarn-proxyserver hive hive-server2
for i in 2 3 4 ; do ssh -nT vagrant@vm-cluster-node$i sudo apt-get install -y hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce hadoop-client ; done
######################################### copy past from here #################
cat > configure-hadoop.sh <<SCRIPT
#! /bin/sh
sudo cp -r /etc/hadoop/conf.empty /etc/hadoop/conf.my_cluster
sudo update-alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my_cluster 50
sudo update-alternatives --set hadoop-conf /etc/hadoop/conf.my_cluster
sudo mkdir -p /data/1/dfs/dn
sudo mkdir -p /data/1/dfs/nn
sudo chown -R hdfs:hdfs /data/1/dfs/*
sudo chmod 700 /data/1/dfs/*
sudo su -c 'cat > /etc/hadoop/conf.my_cluster/core-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://vm-cluster-node1:8020</value>
</property>
</configuration>
EOF'
sudo su -c 'cat > /etc/hadoop/conf.my_cluster/hdfs-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/1/dfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/1/dfs/dn</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>vm-cluster-node1:50070</value>
</property>
</configuration>
EOF'
# installation de yarn
sudo mkdir -p /data/1/yarn/local
sudo mkdir -p /data/1/yarn/logs
sudo chown -R yarn:yarn /data/1/yarn/*
sudo chmod 755 /data/1/yarn/*
sudo su -c 'cat > /etc/hadoop/conf.my_cluster/mapred-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>vm-cluster-node1</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///data/1/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///data/1/yarn/logs</value>
</property>
<property>
<name>yarn.log.aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://vm-cluster-node1:8020/var/log/hadoop-yarn/apps</value>
</property>
mapreduce.jobhistory.address
<property>
<name>mapreduce.jobhistory.address</name>
<value>vm-cluster-node1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>vm-cluster-node1:19888</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.hosts</name>
<value>*</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
</configuration>
EOF'
SCRIPT
chmod +x configure-hadoop.sh
######################################### copy past to here #################
for i in $(seq 1 4) ; do scp configure-hadoop.sh vagrant@vm-cluster-node${i}:. ; done
for i in $(seq 1 4) ; do ssh -nT vagrant@vm-cluster-node${i} sudo ./configure-hadoop.sh ; done
ssh -nT vagrant@vm-cluster-node1 sudo -u hdfs hdfs namenode -format
for i in 2 3 4 ; do ssh -nT vagrant@vm-cluster-node$i sudo service hadoop-yarn-nodemanager restart ; done
ssh -nT vagrant@vm-cluster-node1 sudo service hadoop-yarn-resourcemanager restart
ssh -nT vagrant@vm-cluster-node1 sudo service hadoop-mapreduce-historyserver restart
# sur chacune
for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done)" ; done
######################
#
# le HDFS est installé
#
#######################
# vous pouvez vous connecter
ssh -nT vagrant@vm-cluster-node
sudo -u hdfs hadoop fs -mkdir -p /user/history
sudo -u hdfs hadoop fs -chmod -R 1777 /user/history
sudo -u hdfs hadoop fs -chown mapred:hadoop /user/history
sudo -u hdfs hadoop fs -mkdir -p /var/log/hadoop-yarn
sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn
sudo -u hdfs hadoop fs -ls -R /
sudo service hadoop-yarn-nodemanager start
sudo service hadoop-mapreduce-historyserver start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment