Skip to content

Instantly share code, notes, and snippets.

@ericmoritz
Created March 15, 2011 17:03
Show Gist options
  • Select an option

  • Save ericmoritz/871045 to your computer and use it in GitHub Desktop.

Select an option

Save ericmoritz/871045 to your computer and use it in GitHub Desktop.
import riak
from pprint import pprint
client = riak.RiakClient(port=8098)
query = client.add("goog") # Add the goog bucket
query.map("""function(value, keyData, arg){
var data = Riak.mapValuesJson(value)[0];
var month = value.key.split('-').slice(0,2).join('-');
var obj = {};
obj[month] = data.High;
return [obj];
}""")
query.reduce("""function(values, arg){
return [values.reduce(
function(acc, item){
for(var month in item){
if(acc[month]) {
acc[month] = (acc[month] < item[month]) ? item[month] : acc[month];
} else {
acc[month] = item[month];
}
}
return acc;
})
];
}""", options={"keep":True})
for result in query.run():
pprint(result)
import riak
from pprint import pprint
import time
client = riak.RiakClient(port=8087, transport_class=riak.RiakPbcTransport)
bucket = client.bucket("goog")
query = riak.RiakMapReduce(client)
# Fetch by mapred
for day in range(1, 31):
key = "2010-04-%02d" % day
query.add("goog", key)
# Riak.mapValues returns the data
query.map("Riak.mapValues")
# Riak.filterNotFound removes the not_found values
query.reduce("Riak.filterNotFound")
# Run and time the query execution, roughly around 30ms on my 3 node dev cluster on a single MBP
start = time.time()
query.run()
print (time.time() - start) * 1000
# Fetch by key, roughly around 30ms as well on my 3 node dev cluster on a single MBP
start = time.time()
for day in range(1, 31):
key = "2010-04-%02d" % day
bucket.get(key)
print (time.time() - start) * 1000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment