cd $SPARK_HOME
./bin/spark-submit --packages TargetHolding/pyspark-cassandra:0.3.5 /Users/drehman/Apps/workspace/spark_cassandra_example.py
Last active
July 4, 2016 14:23
-
-
Save danish-rehman/5bb65971f145ff4dfc403073655f3f16 to your computer and use it in GitHub Desktop.
Cassandra spark connector
Repo - https://github.com/TargetHolding/pyspark-cassandra
git clone https://github.com/TargetHolding/pyspark-cassandra.git
git submodule update --init --recursive
Add to ~/.bash_profile
PYTHONPATH="/Users/drehman/Apps/pyspark-cassandra/python:$PYTHONPATH"
cd /Users/drehman/Apps/pyspark-cassandra
sbt compile
sbt spPublishLocal
make dist
Compile or use spark package https://spark-packages.org/package/TargetHolding/pyspark-cassandra
./bin/pyspark --packages TargetHolding:pyspark-cassandra:0.3.2
>> sc.stop()
# In the pyspark shell their is already an instance of sc running. So stop it before configuring it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkConf, SparkContext | |
import pyspark_cassandra | |
def stream_from_cas(): | |
keyspace, table = "mykeyspace", "tweets" | |
conf = SparkConf() \ | |
.setAppName("PySpark Cassandra Test") \ | |
.setMaster("spark://SANM-MBP01L.local:7077") \ | |
.set("spark.cassandra.connection.host", "localhost") \ | |
.set("spark.jars.packages", 'TargetHolding/pyspark-cassandra:0.3.5') | |
sc = pyspark_cassandra.CassandraSparkContext(conf=conf) | |
rdds = sc.cassandraTable(keyspace, table) | |
print rdds.first() | |
if __name__ == "__main__": | |
stream_from_cas() |
Download the zip - https://spark-packages.org/package/TargetHolding/pyspark-cassandra
export PYTHONPATH="/Users/drehman/Apps/pyspark-cassandra/python:$PYTHONPATH"export PYTHONPATH="/Users/drehman/Apps/pyspark-cassandra/python:$PYTHONPATH"
python
>>> import pyspark_cassandra
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pyspark_cassandra/__init__.py", line 24, in <module>
import pyspark_cassandra.context
File "pyspark_cassandra/context.py", line 16, in <module>
from pyspark_cassandra.rdd import CassandraTableScanRDD
File "pyspark_cassandra/rdd.py", line 21, in <module>
from pyspark_cassandra.types import Row
#!/usr/bin/env python
File "pyspark_cassandra/types.py", line 46
SyntaxError: Non-ASCII character '\xc2' in file pyspark_cassandra/types.py on line 46, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details
If you get the above error then follow instruction http://python.org/dev/peps/pep-0263/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment