These steps have been tested with
- Oracle Linux 6.4
- RHEL 6.5
- CDH 5.1
Note I wish this was parceled up.
sudo bash
cd /tmp
wget http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
rpm -ivh epel-release-6-8.noarch.rpm
yum install -y xdg-utils bzip2-devel gcc-c++ gcc-gfortran libX11-devel pcre-devel \
tcl-devel tk-devel zlib-devel readline-devel libXt-devel libpng-devel cairo-devel \
pango-devel 'libXmu.so.6()(64bit)' 'libgfortran.so.1()(64bit)' 'perl(File::Copy::Recursive)'
cd /tmp/
wget http://cran.rstudio.com/src/base/R-2/R-2.15.3.tar.gz
tar xvfz R-2.15.3.tar.gz
cd R-2.15.3
./configure --enable-R-shlib --without-x --with-cairo && make && sudo make install
sudo ln -s /usr/local/bin/Rscript /usr/bin/
sudo ln -s /usr/local/bin/R /usr/bin/
cd ..
alternatives --install /usr/bin/java java /usr/java/jdk1.7.0_45-cloudera/bin/java 2000
alternatives --install /usr/bin/javac javac /usr/java/jdk1.7.0_45-cloudera/bin/javac 2000
alternatives --install /usr/bin/jar jar /usr/java/jdk1.7.0_45-cloudera/bin/jar 2000
alternatives --install /usr/bin/javah javah /usr/java/jdk1.7.0_45-cloudera/bin/javah 2000
# verify the java version
# java -version
# javac -version
export JAVA_HOME=/usr/java/jdk1.7.0_45-cloudera/
export JAVA_CPPFLAGS="-I$JAVA_HOME/include -I$JAVA_HOME/include/linux"
export JAVA_LD_LIBRARY_PATH="$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib"
export LD_LIBRARY_PATH="$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib"
export JAVA_LIBS="-L$JAVA_HOME/jre/lib/amd64/server -L$JAVA_HOME/jre/lib/amd64 -L$JAVA_HOME/../lib/amd64 -L/usr/java/packages/lib/amd64 -L/usr/lib64 -L/lib64 -L/lib -L/usr/lib -ljvm"
R CMD javareconf
# Inside R
install.packages(c("rJava"), repos="http://cran.us.r-project.org/")
library(rJava)
.jinit()
.jcall("java/lang/System","S","getProperty","java.version")
wget http://cran.us.r-project.org/src/contrib/Archive/Rcpp/Rcpp_0.9.15.tar.gz
R CMD INSTALL Rcpp_0.9.15.tar.gz
wget http://cran.revolutionanalytics.com/src/contrib/Archive/plyr/plyr_1.8.tar.gz
R CMD INSTALL plyr_1.8.tar.gz
wget http://cran.revolutionanalytics.com/src/contrib/Archive/reshape2/reshape2_1.2.2.tar.gz
R CMD INSTALL reshape2_1.2.2.tar.gz
# Inside R
install.packages(c("RJSONIO", "bitops", "digest", "functional", "RImpala"), repos="http://cran.us.r-project.org/")
install.packages(c('itertools'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('functional', 'stringr'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('randomForest'), repos="http://cran.revolutionanalytics.com" )
install.packages(c("caTools"), repos="http://cran.us.r-project.org/")
yum install -y git
export HADOOP_CMD=/usr/bin/hadoop
export HADOOP_STREAMING=/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar
git clone git://github.com/RevolutionAnalytics/rmr2.git
sudo R CMD INSTALL --byte-compile rmr2/pkg/
git clone git://github.com/RevolutionAnalytics/rhdfs.git
sudo HADOOP_CMD=/usr/bin/hadoop R CMD INSTALL --byte-compile rhdfs/pkg/
wget http://download2.rstudio.org/rstudio-server-0.98.490-x86_64.rpm
yum install -y 'libssl.so.6()(64bit)' shared-mime-info
rpm -i --nodeps rstudio-server-0.98.490-x86_64.rpm
sudo /usr/sbin/rstudio-server stop
cp /etc/pam.d/login /etc/pam.d/rstudio
echo rsession-which-r=/usr/local/bin/R | sudo tee /etc/rstudio/rstudio.conf
sudo /usr/sbin/rstudio-server start
go to "edge-host:8787"
# in rstudio
Sys.setenv(HADOOP_CMD = "/usr/bin/hadoop")
Sys.setenv(HADOOP_STREAMING = "/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar")
library(rmr2)
small.ints = to.dfs(1:1000)
result = mapreduce(
input = small.ints,
map = function(k, v) cbind(v, v^2),
backend.parameters =
list(
hadoop =
list(
D = "mapreduce.map.memory.mb=8192",
D = "mapreduce.reduce.memory.mb=8192")))
from.dfs(result)