Skip to content

Instantly share code, notes, and snippets.

Created September 15, 2016 20:19
Show Gist options
  • Save anonymous/ea81f46057560d34f09040beeed13024 to your computer and use it in GitHub Desktop.
Save anonymous/ea81f46057560d34f09040beeed13024 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python
import os
os.environ["PYSPARK_SUBMIT_ARGS"] = \
"--packages com.basho.riak:spark-riak-connector_2.10:1.6.0 \
pyspark-shell"
import findspark, time
findspark.init()
from pyspark import SparkContext, SparkConf
conf = SparkConf()
conf.set("spark.riak.connection.host", "127.0.0.1:8087")
sc = SparkContext(conf=conf)
import pyspark_riak
pyspark_riak.riak_context(sc)
kvt = int(time.time() * 1000)
bucket_name = "python-test-bucket-%s" % kvt
bucket_type = "default"
data_to_store = []
for x in range(0, 99):
kvt += 10
key = """demo_key_%(num)s""" % {'num': kvt}
val = """demo_value_%(num)s""" % {'num': kvt}
obj = {key:{"demo_key":val}}
data_to_store.append(obj)
source_rdd = sc.parallelize(data_to_store)
source_rdd.saveToRiak(bucket_name, bucket_type)
data = sc.riakBucket(bucket_name, bucket_type).queryAll()
print data.collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment