check groups
$ compgen -g
prayagupd ...
prayag@prayag$ sudo addgroup hadoop
Adding group `hadoop' (GID 1002) ...
Done.
check users
prayag@prayag$ cut -d: -f1 /etc/passwd | grep hd
sshd
hduser
$ echo $USER
prayag@prayag:~$ sudo adduser --ingroup hadoop hduser
Adding user `hduser' ...
Adding new user `hduser' (1001) with group `hadoop' ...
Creating home directory `/home/hduser' ...
Copying files from `/etc/skel' ...
Enter new UNIX password:
Retype new UNIX password:
passwd: password updated successfully
Changing the user information for hduser
Enter the new value, or press ENTER for the default
Full Name []:
Room Number []:
Work Phone []:
Home Phone []:
Other []:
Is the information correct? [Y/n] Y
Change to user hduser
prayag@prayag:~$ su - hduser
Password:
hduser@prayag:~$
generate an SSH key for the hduser user.
hduser@prayag:~$ ssh-keygen -t rsa -P ""
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hduser/.ssh/id_rsa):
Created directory '/home/hduser/.ssh'.
Your identification has been saved in /home/hduser/.ssh/id_rsa.
Your public key has been saved in /home/hduser/.ssh/id_rsa.pub.
The key fingerprint is:
ed:24:f2:1f:81:08:c5:d1:3e:2e:1c:96:51:66:cd:b4 hduser@prayag
The key's randomart image is:
+--[ RSA 2048]----+
| .o+++. |
| .oo. o. |
| . + E |
| .+.oo |
| oooS.+ |
| oo.+ . |
| .. o |
| . . |
| . |
+-----------------+
hduser@prayag:~$ cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
Hadoop requires SSH access to manage its nodes.
add following line to /etc/sudoers
from user prayagupd
,
hduser ALL=(ALL) ALL
go back to hduser
,
hduser@prayag:~$ sudo apt-get install ssh
hduser@prayagupd:~$ sudo chown -R hduser:hadoop /usr/local/hadoop-2.2.0
hduser@prayag:~$ sudo vi /usr/local/hadoop-2.2.0/etc/hadoop/hadoop-env.sh
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
OR
# disable ipv6
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
# Set Hadoop-related environment variables
HADOOP_INSTALL=/usr/local/hadoop-2.2.0
HADOOP_HOME=$HADOOP_INSTALL
HADOOP_MAPRED_HOME=$HADOOP_INSTALL
HADOOP_COMMON_HOME=$HADOOP_INSTALL
HADOOP_HDFS_HOME=$HADOOP_INSTALL
YARN_HOME=$HADOOP_INSTALL
HADOOP_CONF_DIR=${HADOOP_INSTALL}/etc/hadoop
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)
# export JAVA_HOME=/usr/lib/jvm/java-6-sun
# Some convenient aliases and functions for running Hadoop-related commands
unalias fs &> /dev/null
alias fs="hadoop fs"
unalias hls &> /dev/null
alias hls="fs -ls"
# If you have LZO compression enabled in your Hadoop cluster and
# compress job outputs with LZOP (not covered in this tutorial):
# Conveniently inspect an LZOP compressed file from the command
# line; run via:
#
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
#
# Requires installed 'lzop' command.
#
lzohead () {
hadoop fs -cat $1 | lzop -dc | head -1000 | less
}
# adding hadoop jars in classpath
for jar in $(find $HADOOP_INSTALL/ -type f -name "*.jar"); do
HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
done
CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
PATH=$PATH:$HADOOP_INSTALL/bin:$HADOOP_INSTALL/sbin
export PATH CLASSPATH HADOOP_CLASSPATH
export HADOOP_INSTALL HADOOP_HOME HADOOP_MAPRED_HOME HADOOP_COMMON_HOME HADOOP_HDFS_HOME
export YARN_HOME HADOOP_CONF_DIR
- configure the directory where hadoop will store its data files, the network ports it listens to, etc.
- setup will use Hadoop’s Distributed File System(HDFS), even though little “cluster” only contains single local machine.
$ sudo mkdir -p /app/hadoop/tmp
$ sudo chown hduser:hadoop /app/hadoop/tmp
$ sudo chmod 750 /app/hadoop/tmp
hduser@prayag:~$ sudo vi /usr/local/hadoop-2.2.0/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:54310</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>
hduser@prayag:~$ sudo cp $HADOOP_HOME/etc/hadoop/mapred-site.xml.template $HADOOP_HOME/etc/hadoop/mapred-site.xml
or
hduser@prayag:~$ sudo cp /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml.template /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml
hduser@prayag:~$ sudo vi $HADOOP_HOME/etc/hadoop/mapred-site.xml
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:54311</value>
<description>The host and port that the MapReduce job tracker runs
at. If "local", then jobs are run in-process as a single map
and reduce task.
</description>
</property>
</configuration>
hduser@prayag:~$ sudo vi $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
</configuration>
http://solaimurugan.blogspot.com/2013/11/installing-hadoop-2xx-single-node.html
hduser@prayag:~$ sudo vi $HADOOP_HOME/libexec/hadoop-config.sh
this="${BASH_SOURCE-$0}"
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
script="$(basename -- "$this")"
this="$common_bin/$script"
[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
## at the top of conf file
export JAVA_HOME=/usr/local/jdk1.7.0
recommended
hduser@prayag:~$ hdfs namenode -format
{{ or
hduser@prayag:~$ hadoop namenode -format
}}
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/start-dfs.sh
14/11/22 16:30:57 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [localhost]
localhost: starting namenode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-namenode-prayagupd.out
localhost: starting datanode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-datanode-prayagupd.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-secondarynamenode-prayagupd.out
14/11/22 16:31:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
hduser@prayag:~$ jps
16703 Jps
15696 SecondaryNameNode
15214 NameNode
15424 DataNode
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-hduser-resourcemanager-prayag.out
localhost: starting nodemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-hduser-nodemanager-prayag.out
hduser@prayag:~$ jps
16979 NodeManager
17273 Jps
15696 SecondaryNameNode
15214 NameNode
16768 ResourceManager
15424 DataNode
Check ports
hduser@prayag:~$ sudo netstat -plten | grep java
tcp 0 0 0.0.0.0:50090 0.0.0.0:* LISTEN 1001 204029 15696/java
tcp 0 0 127.0.0.1:63342 0.0.0.0:* LISTEN 1417676764 35675 3300/java
tcp 0 0 0.0.0.0:2864 0.0.0.0:* LISTEN 1417676764 34523 3300/java
tcp 0 0 0.0.0.0:50070 0.0.0.0:* LISTEN 1001 200620 15214/java
tcp 0 0 0.0.0.0:50010 0.0.0.0:* LISTEN 1001 200615 15424/java
tcp 0 0 0.0.0.0:50075 0.0.0.0:* LISTEN 1001 200695 15424/java
tcp 0 0 127.0.0.1:6942 0.0.0.0:* LISTEN 1417676764 34209 3300/java
tcp 0 0 0.0.0.0:50020 0.0.0.0:* LISTEN 1001 203884 15424/java
tcp 0 0 127.0.0.1:54310 0.0.0.0:* LISTEN 1001 200627 15214/java
tcp6 0 0 :::8040 :::* LISTEN 1001 215678 16979/java
tcp6 0 0 :::44585 :::* LISTEN 1001 215655 16979/java
tcp6 0 0 :::8042 :::* LISTEN 1001 215682 16979/java
tcp6 0 0 :::8088 :::* LISTEN 1001 209693 16768/java
tcp6 0 0 :::8030 :::* LISTEN 1001 215683 16768/java
tcp6 0 0 :::8031 :::* LISTEN 1001 215663 16768/java
tcp6 0 0 :::8032 :::* LISTEN 1001 211941 16768/java
tcp6 0 0 :::8033 :::* LISTEN 1001 216025 16768/java
-
| NameNode Web Interface (HDFS layer) | http://localhost:50070/ | web UI of the NameNode daemon
-
| JobTracker Web Interface (MapReduce layer) | http://localhost:50030/ | web UI of the JobTracker daemon
-
| TaskTracker Web Interface (MapReduce layer) | http://localhost:50060/ | web UI of the TaskTracker daemon
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/stop-dfs.sh && /usr/local/hadoop-2.2.0/sbin/stop-yarn.sh
http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-single-node-cluster/ http://solaimurugan.blogspot.com/2013/11/installing-hadoop-2xx-single-node.html http://codesfusion.blogspot.com/2013/10/setup-hadoop-2x-220-on-ubuntu.html?m=1 http://www.ercoppa.org/Linux-Install-Hadoop-220-on-Ubuntu-Linux-1304-Single-Node-Cluster.htm http://www.fromdev.com/2010/12/interview-questions-hadoop-mapreduce.html http://stackoverflow.com/a/14573531/432903 http://blog.gopivotal.com/products/usage-and-quirks-of-fs-default-name-in-hadoop-filesystem