Last active
June 29, 2016 09:02
-
-
Save howardatwork/ecde7c22c66ae259a410 to your computer and use it in GitHub Desktop.
install spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
sudo apt-get -y update | |
sudo apt-get -y install software-properties-common | |
# installation of Java. Should install only one! | |
#default JDK | |
sudo apt-get -y install default-jdk | |
java -version | |
#Optiona for Open JDK | |
#sudo apt-get install openjdk-7-jdk | |
#This is for oracle JDK optional | |
#sudo add-apt-repository -y ppa:webupd8team/java | |
#sudo apt-get -y update | |
#sudo apt-get -y install oracle-java7-installer | |
#Manually run this later if you want to change JDK | |
#sudo update-alternatives --config java|javac|... | |
# Installation of commonly used python scipy tools | |
#sudo apt-get -y install python-numpy python-scipy python-matplotlib ipython ipython-notebook python-pandas python-sympy python-nose | |
#Add a new group "hadoop" and dedicated hadoop user "hduser" . Although its not manadatory but its recommended to keep hadoop installation separate : | |
#sudo addgroup hadoop | |
#sudo adduser --ingroup hadoop hduser | |
#sudo adduser hduser sudo | |
#password-less access to local machine | |
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa | |
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys | |
#wget http://mirror.tcpdiag.net/apache/hive/stable/apache-hive-1.2.1-bin.tar.gz | |
wget http://apache.osuosl.org/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz | |
tar zxvf hadoop-2.7.1.tar.gz | |
cat >>~/.bashrc <<EOF | |
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 | |
export HADOOP_INSTALL=\$HOME/hadoop-2.7.1 | |
export HADOOP_HOME=\$HOME/hadoop-2.7.1 | |
export PATH=\$PATH:\$HADOOP_INSTALL/bin:\$HADOOP_INSTALL/sbin | |
export HADOOP_MAPRED_HOME=\$HADOOP_INSTALL | |
export HADOOP_COMMON_HOME=\$HADOOP_INSTALL | |
export HADOOP_HDFS_HOME=\$HADOOP_INSTALL | |
export YARN_HOME=\$HADOOP_INSTALL | |
export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_INSTALL/lib/native | |
export HADOOP_OPTS="-Djava.library.path=\$HADOOP_INSTALL/lib/native" | |
export HADOOP_CONF_DIR=\$HADOOP_INSTALL/etc/hadoop | |
EOF | |
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 | |
export HADOOP_INSTALL=$HOME/hadoop-2.7.1 | |
export HADOOP_HOME=$HOME/hadoop-2.7.1 | |
export PATH=$PATH:$HADOOP_INSTALL/bin:$HADOOP_INSTALL/sbin | |
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL | |
export HADOOP_COMMON_HOME=$HADOOP_INSTALL | |
export HADOOP_HDFS_HOME=$HADOOP_INSTALL | |
export YARN_HOME=$HADOOP_INSTALL | |
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native | |
export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib/native" | |
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop | |
#install snappy compression | |
sudo apt-get install -y libsnappy-dev | |
cp /usr/lib/libsnappy* $HADOOP_HOME/lib/native/ | |
sed -i 's/\${JAVA_HOME}/\/usr\/lib\/jvm\/java-7-openjdk-amd64/g' $HADOOP_CONF_DIR/hadoop-env.sh | |
cat >$HADOOP_CONF_DIR/core-site.xml <<EOF | |
<configuration> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://localhost:9000</value> | |
</property> | |
</configuration> | |
EOF | |
cat >$HADOOP_CONF_DIR/yarn-site.xml <<EOF | |
<configuration> | |
<!-- Site specific YARN configuration properties --> | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
</configuration> | |
EOF | |
cat >$HADOOP_CONF_DIR/hdfs-site.xml <<EOF | |
<configuration> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
<property> | |
<name>dfs.name.dir</name> | |
<value>file:///home/vagrant/hadoopinfra/hdfs/namenode</value> | |
</property> | |
<property> | |
<name>dfs.data.dir</name> | |
<value>file:///home/vagrant/hadoopinfra/hdfs/datanode</value> | |
</property> | |
</configuration> | |
EOF | |
cat >$HADOOP_CONF_DIR/mapred-site.xml <<EOF | |
<configuration> | |
<property> | |
<name>mapred.job.tracker</name> | |
<value>localhost:54311</value> | |
<description>The host and port that the MapReduce job tracker runs | |
at. If "local", then jobs are run in-process as a single map | |
and reduce task. | |
</description> | |
</property> | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
</configuration> | |
EOF | |
#format the HDFS | |
hadoop namenode -format | |
# start Hadoop. Note that start-all.sh is deprecated. | |
#start-dfs.sh | |
#start-yarn.sh | |
#Accessing Hadoop on Browser | |
#The default port number to access Hadoop is 50070. Use the following url to get Hadoop services on browser. | |
#http://localhost:50070/ | |
#check the Yarn UI: http://localhost:8088/cluster | |
# Installation of scala | |
wget http://www.scala-lang.org/files/archive/scala-2.10.6.deb | |
sudo dpkg -i scala-2.10.6.deb | |
sudo apt-get install -y -f | |
# Installation of sbt | |
wget https://dl.bintray.com/sbt/debian/sbt-0.13.8.deb | |
sudo dpkg -i sbt-0.13.8.deb | |
# Downloading spark | |
#wget http://d3kbcqa49mib13.cloudfront.net/spark-1.5.1.tgz | |
#tar -zxf spark-1.5.1.tgz | |
#cd spark-1.5.1 | |
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.5.2-bin-hadoop2.6.tgz | |
tar -zxf spark-1.5.2-bin-hadoop2.6.tgz | |
cp spark-1.5.2-bin-hadoop2.6/conf/spark-env.sh.template spark-1.5.2-bin-hadoop2.6/conf/spark-env.sh | |
cp spark-1.5.2-bin-hadoop2.6/conf/spark-defaults.conf.template spark-1.5.2-bin-hadoop2.6/conf/spark-defaults.conf | |
cp spark-1.5.2-bin-hadoop2.6/conf/log4j.properties.template spark-1.5.2-bin-hadoop2.6/conf/log4j.properties | |
cp spark-1.5.2-bin-hadoop2.6/conf/slaves.template spark-1.5.2-bin-hadoop2.6/conf/slaves | |
mkdir spark-1.5.2-bin-hadoop2.6/logs | |
cat >>~/.bashrc <<EOF | |
export SPARK_HOME=\$HOME/spark-1.5.2-bin-hadoop2.6 | |
export SPARK_CONF_DIR=\$SPARK_HOME/conf | |
export PATH=\$PATH:\$SPARK_HOME/bin | |
export LIVY_HOME=\$HOME/hue/apps/spark/java | |
EOF | |
export SPARK_HOME=$HOME/spark-1.5.2-bin-hadoop2.6 | |
export SPARK_CONF_DIR=$SPARK_HOME/conf | |
export PATH=$PATH:$SPARK_HOME/bin | |
export LIVY_HOME=$HOME/hue/apps/spark/java | |
cat >>~/spark-1.5.2-bin-hadoop2.6/conf/spark-env.sh <<EOF | |
export SPARK_MASTER_IP=localhost | |
export SPARK_WORKER_CORES=1 | |
export SPARK_WORKER_MEMORY=800m | |
export SPARK_WORKER_INSTANCES=1 | |
EOF | |
cat >>~/spark-1.5.2-bin-hadoop2.6/conf/spark-defaults.conf <<EOF | |
spark.driver.extraClassPath $HOME/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-azure-2.7.1.jar:$HOME/hadoop-2.7.1/share/hadoop/tools/lib/azure-storage-2.0.0.jar | |
EOF | |
#mvn -Pyarn -Phadoop-2.6 -Dhadoop.version=2.6.0 -Phive -Phive-thriftserver -Dscala-2.10.0 -DskipTests clean package | |
#Livy server | |
sudo apt-get install -y maven | |
sudo apt-get install -y git | |
git clone https://github.com/cloudera/hue.git | |
cd hue/apps/spark/java | |
mvn -DskipTests -Dspark.version=1.5.2 clean package | |
#./bin/livy-server & | |
cd | |
cp ~/hue/apps/spark/java/conf/livy-defaults.conf.template ~/hue/apps/spark/java/conf/livy-defaults.conf | |
cat >>~/hue/apps/spark/java/conf/livy-defaults.conf <<EOF | |
livy.server.session.factory = yarn | |
EOF | |
sudo apt-get install -y zookeeperd | |
wget http://mirrors.sonic.net/apache/kafka/0.8.2.2/kafka_2.10-0.8.2.2.tgz | |
tar zxvf kafka_2.10-0.8.2.2.tgz | |
cat >>~/kafka_2.10-0.8.2.2/config/server.properties <<EOF | |
delete.topic.enable = true | |
EOF | |
#start kafka with nohup | |
# ~/kafka_2.10-0.8.2.2/bin/kafka-server-start.sh -daemon ~/kafka_2.10-0.8.2.2/config/server.properties | |
wget http://apache.claz.org/hive/stable/apache-hive-1.2.1-bin.tar.gz | |
tar zxvf apache-hive-1.2.1-bin.tar.gz | |
cat >>~/.bashrc <<EOF | |
export HIVE_HOME=\$HOME/apache-hive-1.2.1-bin | |
export HCAT_HOME=~/apache-hive-1.2.1-bin/hcatalog | |
export PATH=\$PATH:\$HIVE_HOME/bin:\$HCAT_HOME/bin | |
#export CLASSPATH=\$CLASSPATH:\$HADOOP_HOME/lib/*:\$HIVE_HOME/lib/* | |
EOF | |
export HIVE_HOME=$HOME/apache-hive-1.2.1-bin | |
export HCAT_HOME=~/apache-hive-1.2.1-bin/hcatalog | |
export PATH=$PATH:$HIVE_HOME/bin:$HCAT_HOME/bin | |
#export CLASSPATH=$CLASSPATH:$HADOOP_HOME/lib/*:$HIVE_HOME/lib/* | |
cp $HIVE_HOME/conf/hive-env.sh.template $HIVE_HOME/conf/hive-env.sh | |
cat >>$HIVE_HOME/conf/hive-env.sh <<EOF | |
export HADOOP_HOME=\$HADOOP_HOME | |
EOF | |
#use default derby db | |
wget http://archive.apache.org/dist/db/derby/db-derby-10.10.2.0/db-derby-10.10.2.0-bin.tar.gz | |
tar zxvf db-derby-10.10.2.0-bin.tar.gz | |
cat >>~/.bashrc <<EOF | |
export DERBY_HOME=~/db-derby-10.10.2.0-bin | |
export PATH=\$PATH:\$DERBY_HOME/bin | |
export CLASSPATH=\$CLASSPATH:\$DERBY_HOME/lib/derbyclient.jar:\$DERBY_HOME/lib/derbytools.jar | |
EOF | |
export DERBY_HOME=~/db-derby-10.10.2.0-bin | |
export PATH=$PATH:$DERBY_HOME/bin | |
export CLASSPATH=$CLASSPATH:$DERBY_HOME/lib/derbyclient.jar:$DERBY_HOME/lib/derbytools.jar | |
mkdir $DERBY_HOME/data | |
#copy the derby jdbc driver to hadoop and hive | |
cp $DERBY_HOME/lib/derbyclient.jar $HIVE_HOME/lib/ | |
#config HIVE metastore to use derby | |
cat >$HIVE_HOME/conf/hive-site.xml <<EOF | |
<configuration> | |
<property> | |
<name>javax.jdo.option.ConnectionURL</name> | |
<value>jdbc:derby://localhost:1527/metastore_db;create=true </value> | |
<description>JDBC connect string for a JDBC metastore </description> | |
</property> | |
</configuration> | |
EOF | |
#create a symlink for spark to hive-site.xml | |
ln -s $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/hive-site.xml | |
cat >$HIVE_HOME/conf/jpox.properties <<EOF | |
javax.jdo.PersistenceManagerFactoryClass = org.jpox.PersistenceManagerFactoryImpl | |
org.jpox.autoCreateSchema = false | |
org.jpox.validateTables = false | |
org.jpox.validateColumns = false | |
org.jpox.validateConstraints = false | |
org.jpox.storeManagerType = rdbms | |
org.jpox.autoCreateSchema = true | |
org.jpox.autoStartMechanismMode = checked | |
org.jpox.transactionIsolation = read_committed | |
javax.jdo.option.DetachAllOnCommit = true | |
javax.jdo.option.NontransactionalRead = true | |
javax.jdo.option.ConnectionDriverName = org.apache.derby.jdbc.ClientDriver | |
javax.jdo.option.ConnectionURL = jdbc:derby://localhost:1527/metastore_db;create = true | |
javax.jdo.option.ConnectionUserName = APP | |
javax.jdo.option.ConnectionPassword = mine | |
EOF | |
#do this manually if something goes wrong with hive. Hive should auto create them | |
# $HADOOP_HOME/bin/hadoop fs -mkdir /tmp | |
# $HADOOP_HOME/bin/hadoop fs -mkdir /user/hive/warehouse | |
# $HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp | |
# $HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse | |
wget http://mirrors.ocf.berkeley.edu/apache/hbase/0.98.17/hbase-0.98.17-hadoop2-bin.tar.gz | |
tar zxvf hbase-0.98.17-hadoop2-bin.tar.gz | |
#set JAVA_HOME FOR hbase and don't manage and start zoo keeper. We already have that. | |
cat >>~/hbase-0.98.17-hadoop2/conf/hbase-env.sh <<EOF | |
export JAVA_HOME=$JAVA_HOME | |
export HBASE_MANAGES_ZK=false | |
EOF | |
cat >>~/.bashrc <<EOF | |
export HBASE_HOME=~/hbase-0.98.17-hadoop2 | |
export PATH=\$PATH:\$HBASE_HOME/bin | |
EOF | |
export HBASE_HOME=~/hbase-0.98.17-hadoop2 | |
export PATH=$PATH:$HBASE_HOME/bin | |
cat >~/hbase-0.98.17-hadoop2/conf/hbase-site.xml <<EOF | |
<configuration> | |
<property> | |
<name>hbase.cluster.distributed</name> | |
<value>true</value> | |
</property> | |
//Here you have to set the path where you want HBase to store its files. | |
<property> | |
<name>hbase.rootdir</name> | |
<value>hdfs://localhost:9000/hbase</value> | |
</property> | |
//Here you have to set the path where you want HBase to store its built in zo | |
okeeper files. | |
<property> | |
<name>hbase.zookeeper.property.dataDir</name> | |
<value>/home/vagrant/zookeeperdata</value> | |
</property> | |
</configuration> | |
EOF | |
wget https://dist.apache.org/repos/dist/release/kylin/apache-kylin-1.2/apache-kylin-1.2-bin.tar.gz | |
tar zxvf apache-kylin-1.2-bin.tar.gz | |
cat >>~/.bashrc <<EOF | |
export KYLIN_HOME=~/apache-kylin-1.2-bin | |
export PATH=\$PATH:\$KYLIN_HOME/bin | |
EOF | |
export KYLIN_HOME=~/apache-kylin-1.2-bin | |
export PATH=$PATH:$KYLIN_HOME/bin | |
#add this to kylin.properties for 1.2 because hbase bin dist is built with hadoop 2.2 not 2.4+ | |
#Kylin uses this to check for cube build status. Got this from their forum. | |
cat >>$KYLIN_HOME/conf/kylin.properties <<EOF | |
kylin.job.yarn.app.rest.check.status.url=http://localhost:8088/ws/v1/cluster/apps/${job_id}?anonymous=true | |
EOF | |
#Need to create /kylin manually in HDFS if it's not there | |
#create handy startall.sh and stopall.sh. Modify as needed | |
cat >>$HOME/startall.sh <<EOF | |
start-dfs.sh | |
start-yarn.sh | |
startNetworkServer -h 0.0.0.0 & | |
start-hbase.sh | |
EOF | |
chmod a+x $HOME/startall.sh | |
cat >>$HOME/stopall.sh <<EOF | |
stop-dfs.sh | |
stop-yarn.sh | |
stopNetworkServer -h 0.0.0.0 | |
stop-hbase.sh | |
EOF | |
chmod a+x $HOME/stopall.sh | |
#Useful info: | |
#ports | |
#Livy - 8998 | |
#hdfs name node - 9000 | |
#namenode UI - 50070 | |
#Yarn cluster UI - 8088 | |
#zookeeper - 2181 | |
#kafka - 9092 | |
#hbase UI - http://localhost:60010 | |
#kylin UI = 7070 | |
#start services | |
#start-dfs.sh | |
#start-yarn.sh | |
#local mode for livy: | |
#./hue/apps/spark/java/bin/livy-server & | |
#env \ | |
# LIVY_SERVER_JAVA_OPTS="-Dlivy.server.session.factory=yarn" \ | |
# CLASSPATH=`hadoop classpath` \ | |
# $LIVY_HOME/bin/livy-server & | |
# ~/kafka_2.10-0.8.2.2/bin/kafka-server-start.sh -daemon ~/kafka_2.10-0.8.2.2/config/server.properties | |
#start derby | |
#startNetworkServer -h 0.0.0.0 | |
# ./hbase-0.98.17-hadoop2/bin/start-hbase.sh | |
#run $KYLIN_HOME/bin/sample.sh to create sample kylin project | |
# $KYLIN_HOME/bin/kylin.sh start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment