- Download tarballs of hadoop and oozie from http://www.cloudera.com/content/dev-center/en/home/developer-admin-resources/cdh-components.html
- extract into ~/opt/ so you should have them in ~/opt/hadoop-2.0.0-cdh4.4.0 and ~/opt/oozie-3.3.2-cdh4.4.0
in ~/.bash_login or ~/.bash_profile
export HDP_HOME=$HOME/opt/hadoop-2.0.0-cdh4.4.0
export HADOOP_HOME=$HDP_HOME/share/hadoop/mapreduce1
export HADOOP_CONF_DIR=${HDP_HOME}/etc/hadoop
export OOZIE_HOME=$HOME/opt/oozie-3.3.2-cdh4.4.0
export OOZIE_URL=http://localhost:11000/oozie/
export OOZIE_TIMEZONE=America/New_York
export PATH="$HDP_HOME/bin-mapreduce1:$PATH"
export PATH="$OOZIE_HOME/bin:$PATH"
function hadoop-node02() {
hadoop --config ${HDP_HOME}/etc/hadoop-node02 "$@"
}
function oozie-node02() {
oozie "$@" -oozie http://production.cluster.host:11000/oozie/
}
cp -R ${HDP_HOME}/etc/hadoop ${HDP_HOME}/etc/hadoop-node02
cp ${HDP_HOME}/etc/hadoop-node02/mapred-site.xml.template ${HDP_HOME}/etc/hadoop-node02/mapred-site.xml
cp ${HDP_HOME}/etc/hadoop/mapred-site.xml.template ${HDP_HOME}/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://production.cluster.host:8020</value>
</property>
</configuration>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>production.cluster.host:8021</value>
</property>
</configuration>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:8020</value>
</property>
</configuration>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:8021</value>
</property>
</configuration>
#List files in hdfs of your sandbox hadoop
hadoop fs -ls /
#List files from namenode02
hadoop-node02 fs -ls /tapad-data
#List 5 scheduled jobs in oozie at namenode02
oozie-node2 jobs -len 5 -jobtype coord
- download from http://www.cloudera.com/content/support/en/downloads.html
- if anything asks for login/password try cloudera/cloudera
#put these settings into nat.conf on ***your laptop***
#make sure '192.168.170.128' is the IP of your box in vmware (run ifconfig)
sudo vi /Library/Preferences/VMware\ Fusion/vmnet8/nat.conf
[incomingtcp]
50010 = 192.168.170.128:50010
50020 = 192.168.170.128:50020
50030 = 192.168.170.128:50030
50060 = 192.168.170.128:50060
50070 = 192.168.170.128:50070
50075 = 192.168.170.128:50075
8020 = 192.168.170.128:8020
8021 = 192.168.170.128:8021
8888 = 192.168.170.128:8888
11000 = 192.168.170.128:11000
#now restart VMWare's NAT
sudo "/Applications/VMware Fusion.app/Contents/Library/vmnet-cli" --stop
sudo "/Applications/VMware Fusion.app/Contents/Library/vmnet-cli" --start
you can follow instructions here: https://github.com/cloudera/cdk-examples/
User name on your laptop and on the sandbox should match. Add to sudoers if you want.
sudo su -
useradd USERNAME
passwd USERNAME
Open HUE - http://localhost:8888/useradmin/ (cloudera/cloudera) and create hdfs account for yourself - User Admin -> Add User. Make sure 'Create user dir' is selected.
#on the virtualbox execute
sudo su - hdfs
hadoop fs -mkdir /oozie/deployments
hadoop fs -chmod -R 777 /oozie/deployments
exit
#on your laptop as yourself execute
hadoop fs -mkdir /oozie/deployments/lib-scalding
awesome stuff, marked!