check groups
$ compgen -g
prayagupd ...prayag@prayag$ sudo addgroup hadoop
Adding group `hadoop' (GID 1002) ...
Done.check users
prayag@prayag$ cut -d: -f1 /etc/passwd | grep hd
sshd
hduser
$ echo $USER
prayag@prayag:~$ sudo adduser --ingroup hadoop hduser
Adding user `hduser' ...
Adding new user `hduser' (1001) with group `hadoop' ...
Creating home directory `/home/hduser' ...
Copying files from `/etc/skel' ...
Enter new UNIX password: 
Retype new UNIX password: 
passwd: password updated successfully
Changing the user information for hduser
Enter the new value, or press ENTER for the default
        Full Name []:   
        Room Number []: 
        Work Phone []: 
        Home Phone []: 
        Other []: 
Is the information correct? [Y/n] YChange to user hduser
prayag@prayag:~$ su - hduser
Password: 
hduser@prayag:~$ 
generate an SSH key for the hduser user.
hduser@prayag:~$ ssh-keygen -t rsa -P ""
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hduser/.ssh/id_rsa): 
Created directory '/home/hduser/.ssh'.
Your identification has been saved in /home/hduser/.ssh/id_rsa.
Your public key has been saved in /home/hduser/.ssh/id_rsa.pub.
The key fingerprint is:
ed:24:f2:1f:81:08:c5:d1:3e:2e:1c:96:51:66:cd:b4 hduser@prayag
The key's randomart image is:
+--[ RSA 2048]----+
|     .o+++.      |
|     .oo. o.     |
|    .  +  E      |
|     .+.oo       |
|     oooS.+      |
|      oo.+ .     |
|       .. o      |
|         . .     |
|          .      |
+-----------------+
hduser@prayag:~$ cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keysHadoop requires SSH access to manage its nodes.
add following line to /etc/sudoers from user prayagupd,
hduser ALL=(ALL) ALLgo back to hduser,
hduser@prayag:~$ sudo apt-get install sshhduser@prayagupd:~$ sudo chown -R hduser:hadoop /usr/local/hadoop-2.2.0hduser@prayag:~$ sudo vi /usr/local/hadoop-2.2.0/etc/hadoop/hadoop-env.sh
# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
OR
# disable ipv6
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
# Set Hadoop-related environment variables
HADOOP_INSTALL=/usr/local/hadoop-2.2.0
HADOOP_HOME=$HADOOP_INSTALL
HADOOP_MAPRED_HOME=$HADOOP_INSTALL
HADOOP_COMMON_HOME=$HADOOP_INSTALL
HADOOP_HDFS_HOME=$HADOOP_INSTALL
YARN_HOME=$HADOOP_INSTALL
HADOOP_CONF_DIR=${HADOOP_INSTALL}/etc/hadoop
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)
# export JAVA_HOME=/usr/lib/jvm/java-6-sun
# Some convenient aliases and functions for running Hadoop-related commands
unalias fs &> /dev/null
alias fs="hadoop fs"
unalias hls &> /dev/null
alias hls="fs -ls"
# If you have LZO compression enabled in your Hadoop cluster and
# compress job outputs with LZOP (not covered in this tutorial):
# Conveniently inspect an LZOP compressed file from the command
# line; run via:
#
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
#
# Requires installed 'lzop' command.
#
lzohead () {
    hadoop fs -cat $1 | lzop -dc | head -1000 | less
}
# adding hadoop jars in classpath
for jar in $(find $HADOOP_INSTALL/ -type f -name "*.jar"); do
    HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
done
CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
PATH=$PATH:$HADOOP_INSTALL/bin:$HADOOP_INSTALL/sbin
export PATH CLASSPATH HADOOP_CLASSPATH
export HADOOP_INSTALL HADOOP_HOME HADOOP_MAPRED_HOME HADOOP_COMMON_HOME HADOOP_HDFS_HOME
export YARN_HOME HADOOP_CONF_DIR
                                        - configure the directory where hadoop will store its data files, the network ports it listens to, etc.
- setup will use Hadoop’s Distributed File System(HDFS), even though little “cluster” only contains single local machine.
$ sudo mkdir -p /app/hadoop/tmp
$ sudo chown hduser:hadoop /app/hadoop/tmp
$ sudo chmod 750 /app/hadoop/tmphduser@prayag:~$ sudo vi /usr/local/hadoop-2.2.0/etc/hadoop/core-site.xml
<configuration>
<property>
  <name>hadoop.tmp.dir</name>
  <value>/app/hadoop/tmp</value>
  <description>A base for other temporary directories.</description>
</property>
<property>
  <name>fs.default.name</name>
  <value>hdfs://localhost:54310</value>
  <description>The name of the default file system.  A URI whose
  scheme and authority determine the FileSystem implementation.  The
  uri's scheme determines the config property (fs.SCHEME.impl) naming
  the FileSystem implementation class.  The uri's authority is used to
  determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>hduser@prayag:~$ sudo cp $HADOOP_HOME/etc/hadoop/mapred-site.xml.template $HADOOP_HOME/etc/hadoop/mapred-site.xml
or 
hduser@prayag:~$ sudo cp /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml.template /usr/local/hadoop-2.2.0/etc/hadoop/mapred-site.xml
hduser@prayag:~$ sudo vi $HADOOP_HOME/etc/hadoop/mapred-site.xml
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
  <name>mapred.job.tracker</name>
  <value>localhost:54311</value>
  <description>The host and port that the MapReduce job tracker runs
  at.  If "local", then jobs are run in-process as a single map
  and reduce task.
  </description>
</property>
</configuration>hduser@prayag:~$ sudo vi $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
  <name>dfs.replication</name>
  <value>1</value>
  <description>Default block replication.
  The actual number of replications can be specified when the file is created.
  The default is used if replication is not specified in create time.
  </description>
</property>
</configuration>http://solaimurugan.blogspot.com/2013/11/installing-hadoop-2xx-single-node.html
hduser@prayag:~$ sudo vi $HADOOP_HOME/libexec/hadoop-config.sh 
this="${BASH_SOURCE-$0}"
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
script="$(basename -- "$this")"
this="$common_bin/$script"
[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
## at the top of conf file
export JAVA_HOME=/usr/local/jdk1.7.0
recommended
hduser@prayag:~$ hdfs namenode -format
{{ or
  hduser@prayag:~$ hadoop namenode -format
}}
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/start-dfs.sh
14/11/22 16:30:57 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [localhost]
localhost: starting namenode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-namenode-prayagupd.out
localhost: starting datanode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-datanode-prayagupd.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /usr/local/hadoop-2.2.0/logs/hadoop-hduser-secondarynamenode-prayagupd.out
14/11/22 16:31:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
hduser@prayag:~$ jps
16703 Jps
15696 SecondaryNameNode
15214 NameNode
15424 DataNode
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-hduser-resourcemanager-prayag.out
localhost: starting nodemanager, logging to /usr/local/hadoop-2.2.0/logs/yarn-hduser-nodemanager-prayag.out
hduser@prayag:~$ jps
16979 NodeManager
17273 Jps
15696 SecondaryNameNode
15214 NameNode
16768 ResourceManager
15424 DataNodeCheck ports
hduser@prayag:~$ sudo netstat -plten | grep java
tcp        0      0 0.0.0.0:50090           0.0.0.0:*               LISTEN      1001       204029      15696/java      
tcp        0      0 127.0.0.1:63342         0.0.0.0:*               LISTEN      1417676764 35675       3300/java       
tcp        0      0 0.0.0.0:2864            0.0.0.0:*               LISTEN      1417676764 34523       3300/java       
tcp        0      0 0.0.0.0:50070           0.0.0.0:*               LISTEN      1001       200620      15214/java      
tcp        0      0 0.0.0.0:50010           0.0.0.0:*               LISTEN      1001       200615      15424/java      
tcp        0      0 0.0.0.0:50075           0.0.0.0:*               LISTEN      1001       200695      15424/java      
tcp        0      0 127.0.0.1:6942          0.0.0.0:*               LISTEN      1417676764 34209       3300/java       
tcp        0      0 0.0.0.0:50020           0.0.0.0:*               LISTEN      1001       203884      15424/java      
tcp        0      0 127.0.0.1:54310         0.0.0.0:*               LISTEN      1001       200627      15214/java      
tcp6       0      0 :::8040                 :::*                    LISTEN      1001       215678      16979/java      
tcp6       0      0 :::44585                :::*                    LISTEN      1001       215655      16979/java      
tcp6       0      0 :::8042                 :::*                    LISTEN      1001       215682      16979/java      
tcp6       0      0 :::8088                 :::*                    LISTEN      1001       209693      16768/java      
tcp6       0      0 :::8030                 :::*                    LISTEN      1001       215683      16768/java      
tcp6       0      0 :::8031                 :::*                    LISTEN      1001       215663      16768/java      
tcp6       0      0 :::8032                 :::*                    LISTEN      1001       211941      16768/java      
tcp6       0      0 :::8033                 :::*                    LISTEN      1001       216025      16768/java - 
| NameNode Web Interface (HDFS layer) | http://localhost:50070/ | web UI of the NameNode daemon 
- 
| JobTracker Web Interface (MapReduce layer) | http://localhost:50030/ | web UI of the JobTracker daemon 
- 
| TaskTracker Web Interface (MapReduce layer) | http://localhost:50060/ | web UI of the TaskTracker daemon 
hduser@prayag:~$ /usr/local/hadoop-2.2.0/sbin/stop-dfs.sh && /usr/local/hadoop-2.2.0/sbin/stop-yarn.sh
http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-single-node-cluster/ http://solaimurugan.blogspot.com/2013/11/installing-hadoop-2xx-single-node.html http://codesfusion.blogspot.com/2013/10/setup-hadoop-2x-220-on-ubuntu.html?m=1 http://www.ercoppa.org/Linux-Install-Hadoop-220-on-Ubuntu-Linux-1304-Single-Node-Cluster.htm http://www.fromdev.com/2010/12/interview-questions-hadoop-mapreduce.html http://stackoverflow.com/a/14573531/432903 http://blog.gopivotal.com/products/usage-and-quirks-of-fs-default-name-in-hadoop-filesystem