Created
November 9, 2017 11:25
-
-
Save dice89/533e1883e277ec3c01182308ccdd6326 to your computer and use it in GitHub Desktop.
Proof of Concept Spark,PySpark Cassandra Setup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Install scala 2.11.8 | |
| export scalaVer="2.11.8" | |
| sudo apt-get remove scala-library scala | |
| wget www.scala-lang.org/files/archive/scala-"$scalaVer".deb | |
| sudo dpkg -i scala-"$scalaVer".deb | |
| sudo apt-get -y --force-yes update | |
| sudo apt-get -y --force-yes install scala | |
| ## Install Spark | |
| wget https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz | |
| tar -zxvf spark-2.2.0-bin-hadoop2.7.tgz | |
| mv spark-2.2.0-bin-hadoop2.7 /opt/spark-2.2.0 | |
| ln -s /opt/spark-2.2.0 /opt/spark | |
| cp /opt/spark/conf/spark-defaults.conf.template /opt/spark/conf/spark-defaults.conf | |
| echo " | |
| spark.executor.memory 10g | |
| spark.driver.memory 2g | |
| spark.master spark://sparkmaster:7077 | |
| spark.jar.packages datastax:spark-cassandra-connector:2.0.0-s_2.11 | |
| spark.cassandra.connection.host node1,node2,node3 | |
| " >> /opt/spark/conf/spark-defaults.conf | |
| # create virtualenv | |
| pyenv virtualenv 3.4.3 sparkpython3 | |
| pyenv activate sparkpython3 | |
| pip install pip --upgrade | |
| pip install jupyter | |
| # this makes pyspark to start by default a juptyer notebook | |
| echo " | |
| export SPARK_HOME=/opt/spark | |
| export PYSPARK_PYTHON=~/.pyenv/versions/test/bin/python3 | |
| export PATH=$SPARK_HOME/bin:$PATH | |
| export PYSPARK_DRIVER_PYTHON=jupyter | |
| export PYSPARK_DRIVER_PYTHON_OPTS='notebook --no-browser --allow-root --ip 127.0.0.1' | |
| " >> ~/.bashrc | |
| source ~/.bashrc | |
| # Start spark workers | |
| sudo /opt/spark/sbin/start-master.sh --properties-file /opt/spark/conf/spark-defaults.conf | |
| sudo /opt/spark/sbin/start-slave.sh --properties-file /opt/spark/conf/spark-defaults.conf spark://master:7077 | |
| # Start pyspark with pyspark cassandra support | |
| /opt/spark/bin/pyspark --packages anguenot:pyspark-cassandra:0.6.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment