Skip to content

Instantly share code, notes, and snippets.

@BYVoid
Created August 2, 2012 04:31
Show Gist options
  • Save BYVoid/3233582 to your computer and use it in GitHub Desktop.
Save BYVoid/3233582 to your computer and use it in GitHub Desktop.
Cassandra Scan
import pycassa
import time
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
servers = [
'',
]
pool = ConnectionPool('Keyspace1', servers, pool_size=1)
cf = ColumnFamily(pool, 'Standard1')
def main():
total = 0
batch = 5000
last_key = ''
elapsed_total = 0
print 'last_key, total, elapsed, qps, qps_overall'
while True:
st = time.time()
rows = cf.get_range(start=last_key, column_count=1, row_count=batch, read_consistency_level=pycassa.ConsistencyLevel.ONE)
count = 0
for row in rows:
last_key = row[0]
count += 1
if count == 0:
break
total += count
elapsed = time.time() - st
elapsed_total += elapsed
qps = batch / elapsed
qps_overall = total / elapsed_total
print last_key, total, elapsed, qps, qps_overall
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment