Last active
January 4, 2021 12:33
-
-
Save ghafran/19d0067d88dc074413422d4cae4cc344 to your computer and use it in GitHub Desktop.
Setup Spark 2.0 with Cassandra Connector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install java | |
sudo apt-get update -y | |
sudo apt-get install software-properties-common -y | |
sudo add-apt-repository -y ppa:openjdk-r/ppa | |
sudo apt-get install wget -y | |
sudo apt-get install openjdk-8-jdk -y | |
sudo apt-get update -y | |
# make serve directory | |
sudo mkdir -p /srv | |
cd /srv | |
# install scala 2.11 | |
sudo wget http://downloads.lightbend.com/scala/2.11.7/scala-2.11.7.deb | |
sudo dpkg -i scala-2.11.7.deb | |
# get spark 2.0 | |
sudo wget http://d3kbcqa49mib13.cloudfront.net/spark-2.0.0-bin-hadoop2.7.tgz | |
sudo tar -zxf spark-2.0.0-bin-hadoop2.7.tgz | |
sudo mv spark-2.0.0-bin-hadoop2.7 spark | |
# build spark cassandra connector | |
echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list | |
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 642AC823 | |
sudo apt-get install apt-transport-https -y | |
sudo apt-get update -y | |
sudo apt-get install sbt -y | |
git clone https://github.com/datastax/spark-cassandra-connector.git | |
cd spark-cassandra-connector | |
git checkout v2.0.0-M2 | |
sudo sbt assembly -Dscala-2.11=true | |
# move spark cassandra connector to spark jar directory | |
find . -iname "*.jar" -type f -exec /bin/cp {} /srv/spark/jars/ \; | |
# start master | |
/srv/spark/sbin/start-master.sh --host 0.0.0.0 | |
# start slave | |
/srv/spark/sbin/start-slave.sh --host 0.0.0.0 spark://localhost:7077 | |
# start shell | |
/srv/spark/sbin/spark-shell --driver-class-path $(echo /srv/spark/jars/*.jar |sed 's/ /:/g') | |
# test | |
sc.stop | |
import org.apache.spark | |
import org.apache.spark._ | |
import org.apache.spark.SparkContext | |
import org.apache.spark.SparkContext._ | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.sql.cassandra | |
import org.apache.spark.sql.cassandra._ | |
import com.datastax.spark | |
import com.datastax.spark._ | |
import com.datastax.spark.connector | |
import com.datastax.spark.connector._ | |
import com.datastax.spark.connector.cql | |
import com.datastax.spark.connector.cql._ | |
import com.datastax.spark.connector.cql.CassandraConnector | |
import com.datastax.spark.connector.cql.CassandraConnector._ | |
val conf = new SparkConf(true).set("spark.cassandra.connection.host", "cassandraserver") | |
val sc = new SparkContext("spark://localhost:7077", "test", conf) | |
val table = sc.cassandraTable("keyspace", "users") | |
println(table.count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment