Created
August 30, 2010 04:15
-
-
Save eklitzke/557009 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""MySQL benchmark tool, for comparing different table schemas. | |
Some notes: | |
* run like: python mysqlbench.py my_config_file.yaml | |
* there's a few different options to change things up, invoke with -h to see | |
them | |
* you should run this with with a total of at least a quarter million rows or | |
so (default is one million) to ensure that you see the slowdown from MySQL | |
checking uniqueness contraints; i.e., make sure you see a slow down as | |
iterations increase for the schemas that create tables with a unique uuid | |
column | |
* there's some overhead from having to generate uuids (which is done by reading | |
16 bytes from /dev/urandom); IME the benchmark is still very much | |
MySQL-bound, but if you're concerned you can pre-allocate an array of uuids | |
in the bench() function, at the cost of using gobs of memory | |
* if you're on a machine without dedicated hardware (e.g. a VPS), you'll | |
probably see interesting things with transaction times fluctuating wildly as | |
your instance gets access to hardware | |
An example yaml config file (ignore the lines starting with ---): | |
--- start yaml file --- | |
user: test | |
passwd: test | |
db: test | |
--- end yaml file --- | |
""" | |
import os | |
import csv | |
import math | |
import time | |
import yaml | |
import optparse | |
import MySQLdb | |
OVERALL_TIMES = [] | |
def drop_test_entities(conn): | |
c = conn.cursor() | |
c.execute('SELECT table_name FROM information_schema.tables WHERE table_schema = DATABASE() AND table_name = %s', 'test_entities') | |
if c.fetchone(): | |
c.execute('DROP TABLE test_entities') | |
def increment_worker(c): | |
os.urandom(16) # ensure that this has the same overhead as uuid_worker; | |
# comment out if you don't like this fairness | |
c.execute('INSERT INTO test_entities (added_id) VALUES (NULL)') | |
def uuid_worker(c): | |
c.execute('INSERT INTO test_entities (id) VALUES (%s)', os.urandom(16)) | |
def bench(name, opts, conn, schema, worker=uuid_worker): | |
drop_test_entities(conn) | |
if opts.sleep: | |
time.sleep(opts.sleep) | |
print name | |
print '=' * len(name) | |
times = [] | |
c = conn.cursor() | |
c.execute(schema) | |
for x in xrange(opts.num_iterations): | |
if not opts.autocommit: | |
c.execute('SET TRANSACTION ISOLATION LEVEL REPEATABLE READ') | |
ts = time.time() | |
for y in xrange(opts.batch_size): | |
worker(c) | |
conn.commit() | |
elapsed = time.time() - ts | |
times.append(elapsed) | |
print '% 4d %f' % (x + 1, elapsed) | |
OVERALL_TIMES.append((name, times)) | |
sorted_times = sorted(times) | |
total = sum(times) | |
avg = total / len(times) | |
if len(times) % 2 == 0: | |
idx = len(times) / 2 | |
med = (sorted_times[idx] + sorted_times[idx + 1]) / 2 | |
else: | |
med = times[len(sorted_times) / 2] | |
dev = math.sqrt(sum((x - avg)**2 for x in times) / len(times)) | |
print 'average = %1.3f' % (avg,) | |
print 'median = %1.3f' % (med,) | |
print 'std dev = %1.3f' % (dev,) | |
return times | |
if __name__ == '__main__': | |
parser = optparse.OptionParser() | |
parser.add_option('-a', '--autocommit', action='store_true', default=False, help='Enable auto-commit') | |
parser.add_option('-n', '--num-iterations', type='int', default=100, help='How many iterations to run') | |
parser.add_option('-b', '--batch-size', type='int', default=10000, help='How many rows to insert per txn') | |
parser.add_option('-c', '--csv', default=None, help='Store benchmark output in the specified CSV file') | |
parser.add_option('-s', '--sleep', type='int', default=10, help='How long to sleep between tests') | |
opts, args = parser.parse_args() | |
start = time.time() | |
if len(args) != 1: | |
parser.error('must pass exactly one argument, the path to the mysql config file') | |
cfg = yaml.load(open(args[0]).read()) | |
conn = MySQLdb.connect(**cfg) | |
opsys, host, kernel, dt, arch = os.uname() | |
print '%s %s' % (opsys, kernel) | |
print 'MySQL ' + conn.get_server_info() | |
print 'running %d iterations of %d inserts per txn (%d rows total)' % (opts.num_iterations, opts.batch_size, opts.num_iterations * opts.batch_size) | |
if opts.autocommit: | |
conn.cursor().execute('SET autocommit = 1') | |
print 'autocommit is ON' | |
else: | |
conn.cursor().execute('SET autocommit = 0') | |
print 'autocommit is OFF' | |
bench('just auto_increment', opts, conn, """ | |
CREATE TABLE test_entities ( | |
added_id INTEGER NOT NULL AUTO_INCREMENT, | |
PRIMARY KEY (added_id) | |
) ENGINE=InnoDB | |
""", increment_worker) | |
bench('auto_increment, no key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
added_id INTEGER NOT NULL AUTO_INCREMENT, | |
id BINARY(16) NOT NULL, | |
PRIMARY KEY (added_id) | |
) ENGINE=InnoDB | |
""") | |
bench('auto_increment, key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
added_id INTEGER NOT NULL AUTO_INCREMENT, | |
id BINARY(16) NOT NULL, | |
PRIMARY KEY (added_id), | |
KEY (id) | |
) ENGINE=InnoDB | |
""") | |
bench('auto_increment, unique key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
added_id INTEGER NOT NULL AUTO_INCREMENT, | |
id BINARY(16) NOT NULL, | |
PRIMARY KEY (added_id), | |
UNIQUE KEY (id) | |
) ENGINE=InnoDB | |
""") | |
bench('w/o auto-increment, key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
id BINARY(16) NOT NULL, | |
KEY (id) | |
) ENGINE=InnoDB | |
""") | |
bench('w/o auto-increment, unique key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
id BINARY(16) NOT NULL, | |
UNIQUE KEY (id) | |
) ENGINE=InnoDB | |
""") | |
bench('w/o auto-increment, primary key', opts, conn, """ | |
CREATE TABLE test_entities ( | |
id BINARY(16) NOT NULL, | |
PRIMARY KEY (id) | |
) ENGINE=InnoDB""") | |
drop_test_entities(conn) | |
if opts.csv: | |
writer = csv.writer(open(opts.csv, 'w')) | |
names = ['cumulative'] + [name for name, _ in OVERALL_TIMES] | |
writer.writerow(names) | |
writer.writerow([0 for x in xrange(len(OVERALL_TIMES) + 1)]) | |
for x in xrange(opts.num_iterations): | |
tot = (x + 1) * opts.batch_size | |
writer.writerow([tot] + [t[x] for _, t in OVERALL_TIMES]) | |
print 'csv output is in %r' % (opts.csv,) | |
print 'total time was %1.3f seconds' % (time.time() - start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment