Skip to content

Instantly share code, notes, and snippets.

@bugcy013
Last active December 14, 2015 04:29
Show Gist options
  • Save bugcy013/5028652 to your computer and use it in GitHub Desktop.
Save bugcy013/5028652 to your computer and use it in GitHub Desktop.
0.check-selinux.log
1.install-repo-pkg.log
2.refresh-repo.log
3.install-oracle-j2sdk1.6.log
4.install deameons
sudo ufw enable
root@dhana:/etc/apt/sources.list.d# cat cloudera-cdh4.list
deb [arch=amd64] http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/ precise-cdh4.0.1 contrib
apt-get update
check
dpkg -l oracle-j2sdk1.6 | grep -E '^ii[[:space:]]*oracle-j2sdk1.6[[:space:]]*'
if no install
apt-get -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -y install oracle-j2sdk1.6
dpkg -l hadoop | grep -E '^ii[[:space:]]*hadoop[[:space:]]*'
apt-get -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -y install hadoop
\-->depends on zookeeper
dpkg -l hadoop-hdfs | grep -E '^ii[[:space:]]*hadoop-hdfs[[:space:]]*'
apt-get -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -y install hadoop-hdfs
dpkg -l hadoop-mapreduce | grep -E '^ii[[:space:]]*hadoop-mapreduce[[:space:]]*'
apt-get -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold -y install hadoop-mapreduce
For standalone you need to install these packages
=================================================
apt-get install
hadoop
hadoop-hdfs-namenode
hadoop-hdfs-datanode
hadoop-0.20-namenode hadoop-0.20-jobtracker
hadoop-0.20-datanode hadoop-0.20-tasktracker
root@ubuntu-VirtualBox:/etc/hadoop/conf# cat core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:8020</value>
</property>
</configuration>
root@ubuntu-VirtualBox:/etc/hadoop/conf# cat mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:8021</value>
</property>
</configuration>
root@ubuntu-VirtualBox:/etc/hadoop/conf# cat hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- Immediately exit safemode as soon as one DataNode checks in.
On a multi-node cluster, these configurations must be removed. -->
<property>
<name>dfs.safemode.extension</name>
<value>0</value>
</property>
<property>
<name>dfs.safemode.min.datanodes</name>
<value>1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/lib/hadoop-hdfs/cache/${user.name}</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/var/lib/hadoop-hdfs/cache/${user.name}/dfs/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/var/lib/hadoop-hdfs/cache/${user.name}/dfs/namesecondary</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/var/lib/hadoop-hdfs/cache/${user.name}/dfs/data</value>
</property>
</configuration>
$sudo -u hdfs hadoop namenode -format
1. Download and install CDH4 repository
wget http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb
$ sudo dpkg -i cdh4-repository_1.0_all.deb
2. Add the Cloudera Public GPG Key to your repository
$ curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | sudo apt-key add
$ sudo apt-get update
$ sudo apt-get install hadoop-0.20-conf-pseudo
$ sudo -u hdfs hdfs namenode -format
Start HDFS
$ for service in /etc/init.d/hadoop-hdfs-*; do $service start; done
To verify the services have started, we can check http://localhost:50070
3. Create /tmp directory and set permissions
## Create a bunch of directories
sudo -u hdfs hadoop fs -mkdir /var
sudo -u hdfs hadoop fs -mkdir /var/lib
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoophdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoophdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred
$ sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
## Check the output:
sudo -u hdfs hadoop fs -ls -R /
## Returns
drwxrwxrwt - hdfs supergroup 0 /tmp
drwxr-xr-x - hdfs supergroup 0 /var
drwxr-xr-x - hdfs supergroup 0 /var/lib
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoop-hdfs
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoop-hdfs/cache
drwxr-xr-x - mapred supergroup 0 /var/lib/hadoop-hdfs/cache/mapred
drwxr-xr-x - mapred supergroup 0 /var/lib/hadoop-hdfs/cache/mapred/mapred
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoophdfs
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoophdfs/cache
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoophdfs/cache/mapred
drwxr-xr-x - hdfs supergroup 0 /var/lib/hadoophdfs/cache/mapred/mapred
drwxrwxrwt - hdfs supergroup 0 /var/lib/hadoophdfs/cache/mapred/mapred/staging
$ sudo -u hdfs hadoop fs -mkdir /user/$USER
$ sudo -u hdfs hadoop fs -chown $USER /user/$USER
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment