Created
August 23, 2012 21:43
-
-
Save timtadh/3442267 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import random | |
from thrift import Thrift | |
from thrift.transport import TSocket | |
from thrift.transport import TTransport | |
from thrift.protocol import TBinaryProtocol | |
from hbase import Hbase | |
from hbase.ttypes import * | |
from datetime import datetime | |
transport = TSocket.TSocket('localhost', 9090) | |
# Buffering is critical. Raw sockets are very slow | |
transport = TTransport.TBufferedTransport(transport) | |
# Wrap in a protocol | |
protocol = TBinaryProtocol.TBinaryProtocol(transport) | |
client = Hbase.Client(protocol) | |
transport.open() | |
try: #Try to create the table if it doesn't exist | |
client.createTable('test_table', [ColumnDescriptor(name='meta')]) | |
except AlreadyExists, tx: | |
print "Thrift exception" | |
print '%s' % (tx.message) | |
for benchmark in range(8): #range(8) goes up to 10,000,000 rows, takes about 3hrs on my 2GB linode | |
num = 10**benchmark | |
print "Benchmark #%d: %d rows" %(benchmark, num) | |
start = datetime.now() | |
for i in range(num): | |
client.deleteAllRow('test_table', "%d"%i) | |
print ' Rows deleted: %s' % (datetime.now() - start) | |
create_start = datetime.now() | |
for i in range(num): | |
client.mutateRow('test_table', "%d"%i, | |
[Mutation(column="meta:%s"%random.choice(['a', 'b', 'c']), | |
value="%d"%random.randrange(100))]) | |
print ' Rows created: %s' % (datetime.now() - create_start) | |
get_start = datetime.now() | |
r = client.getRow('test_table', "%s"%random.randrange(num)) | |
print ' Row lookup: %s' % (datetime.now() - get_start) | |
scan_start = datetime.now() | |
s = client.scannerOpen('test_table', '', ['meta:a']) | |
r = client.scannerGet(s) | |
count = 0 | |
r = client.scannerGet(s) | |
while r: | |
count += 1 | |
r = client.scannerGet(s) | |
print ' Fetched %s rows with meta:A: %s' %(count, datetime.now() - scan_start) | |
print ' Total Benchmark Time: %s' % (datetime.now() - start) | |
# Sample output -- | |
#Benchmark #0: 1 rows | |
# Rows deleted: 0:00:00.000816 | |
# Rows created: 0:00:00.001384 | |
# Row lookup: 0:00:00.001078 | |
# Fetched 319 rows with meta:A: 0:00:00.176335 | |
# Total Benchmark Time: 0:00:00.179796 | |
#Benchmark #1: 10 rows | |
# Rows deleted: 0:00:00.004934 | |
# Rows created: 0:00:00.006587 | |
# Row lookup: 0:00:00.000725 | |
# Fetched 316 rows with meta:A: 0:00:00.171574 | |
# Total Benchmark Time: 0:00:00.183974 | |
#Benchmark #2: 100 rows | |
# Rows deleted: 0:00:00.051719 | |
# Rows created: 0:00:00.064601 | |
# Row lookup: 0:00:00.000687 | |
# Fetched 328 rows with meta:A: 0:00:00.176993 | |
# Total Benchmark Time: 0:00:00.294160 | |
#Benchmark #3: 1000 rows | |
# Rows deleted: 0:00:00.488865 | |
# Rows created: 0:00:00.585270 | |
# Row lookup: 0:00:00.000675 | |
# Fetched 312 rows with meta:A: 0:00:00.168604 | |
# Total Benchmark Time: 0:00:01.243603 | |
#Benchmark #4: 10000 rows | |
# Rows deleted: 0:00:04.546011 | |
# Rows created: 0:00:05.570421 | |
# Row lookup: 0:00:00.000995 | |
# Fetched 3372 rows with meta:A: 0:00:01.810908 | |
# Total Benchmark Time: 0:00:11.928531 | |
#Benchmark #5: 100000 rows | |
# Rows deleted: 0:00:41.258448 | |
# Rows created: 0:00:55.093361 | |
# Row lookup: 0:00:00.000818 | |
# Fetched 33274 rows with meta:A: 0:00:17.005304 | |
# Total Benchmark Time: 0:01:53.358131 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment