Skip to content

Instantly share code, notes, and snippets.

@Eduardoveras
Created June 5, 2017 03:14
Show Gist options
  • Save Eduardoveras/b17ed38e2c02cdedc4772c2eec2df78c to your computer and use it in GitHub Desktop.
Save Eduardoveras/b17ed38e2c02cdedc4772c2eec2df78c to your computer and use it in GitHub Desktop.
Hadoop install script with yarn, hdfs, etc. Hadoop file is downloaded automatically but if you have it you can comment that line and place the file in the same folder as the script
#!/bin/bash
sudo add-apt-repository ppa:openjdk-r/ppa -y
sudo apt-get update
sudo apt-get install openjdk-7-jdk -y
sudo update-alternatives --config java
#wget http://www-us.apache.org/dist/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz
#cd $HOME
mkdir $HOME/hadoop
tar xfz hadoop-2.8.0.tar.gz
sudo mv hadoop-2.8.0/* $HOME/hadoop
sudo apt-get install ssh -y
sudo apt-get install rsync -y
ssh-keygen -t rsa -P ''
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
echo '#HADOOP VARIABLES START
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export HADOOP_HOME=/home/bigdata/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
#HADOOP VARIABLES END' >> $HOME/.bashrc
source ~/.bashrc
echo '<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>' > $HOME/hadoop/etc/hadoop/core-site.xml
mkdir -p $HOME/workspace/dfs/name
mkdir -p $HOME/workspace/dfs/data
echo '<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:$HOME/workspace/dfs/name</value>
<description>Ruta del sistema de archivos donde el NameNode
almacenará los metadatos.</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:$HOME/workspace/dfs/data</value>
<description>Ruta del sistema de archivos donde el DataNode
almacenerá los bloques.</description>
</property>
<property>
<name>dfs.blocksize</name>
<value>65536</value>
<description>Tamaño de los bloques en que será dividido un archivo.</description>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Factor de replicación. Como tenemos una única computadora en
el clúster lo ponemos a 1.</description>
</property>
</configuration>' > $HOME/hadoop/etc/hadoop/hdfs-site.xml
cp $HOME/hadoop/etc/hadoop/mapred-site.xml.template $HOME/hadoop/etc/hadoop/mapred-site.xml
mkdir -p $HOME/workspace/mapred/system
mkdir -p $HOME/workspace/mapred/local
echo '<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>file:$HOME/workspace/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>file:$Home/workspace/mapred/local</value>
<final>true</final>
</property>
</configuration>' > $HOME/hadoop/etc/hadoop/mapred-site.xml
echo '<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>' > $HOME/hadoop/etc/hadoop/yarn-site.xml
cd $HOME/hadoop/bin/
hdfs namenode -format
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment