Skip to content

Instantly share code, notes, and snippets.

@gabhi
Created February 1, 2015 02:50
Show Gist options
  • Save gabhi/088927912e1c5cad3e77 to your computer and use it in GitHub Desktop.
Save gabhi/088927912e1c5cad3e77 to your computer and use it in GitHub Desktop.
cassandra spark cluster example
start cassandra;
start spark;(./sbin/start-master.sh)
run following in cqlsh
CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 };
CREATE TABLE test.kv(key text PRIMARY KEY, value int);
INSERT INTO test.kv(key, value) VALUES ('key1', 1);
INSERT INTO test.kv(key, value) VALUES ('key2', 2);
Go to spark shell
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import com.datastax.spark.connector._;
val conf = new SparkConf(true).set("spark.cassandra.connection.host", "127.0.0.1");
val sc = new SparkContext("spark://L-SB8F10VFFT-M.local:7077", "test", conf)
val rdd = sc.cassandraTable("test", "kv")
println(rdd.count)
println(rdd.first)
println(rdd.map(_.getInt("value")).sum)
val collection = sc.parallelize(Seq(("key3", 3), ("key4", 4)))
collection.saveToCassandra("test", "kv", SomeColumns("key", "value"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment