Last active
August 29, 2015 14:04
-
-
Save kevinlondon/fd4b36851abc9eafa296 to your computer and use it in GitHub Desktop.
MySQL UUID Field Benchmarks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import MySQLdb | |
import logging | |
import uuid | |
import time | |
import random | |
from contextlib import contextmanager | |
from binascii import unhexlify, hexlify | |
# Modified from http://iops.io/blog/storing-billions-uuid-fields-mysql-innodb/ | |
def configure_logging(): | |
FORMAT = "[%(asctime)-15s] [%(filename)s:%(lineno)d] [%(process)d/%(processName)s] %(message)s" | |
logging.basicConfig(format=FORMAT, level=logging.DEBUG) | |
configure_logging() | |
def flatten_list(seq): | |
merged = [] | |
for s in seq: | |
for x in s: | |
merged.append(x) | |
return merged | |
class Benchmark(object): | |
def __init__(self): | |
self.start = time.time() | |
self.marks = [] | |
self.times = [] | |
@contextmanager | |
def benchmark(self): | |
t = time.time() | |
yield | |
now = time.time() | |
elapsed = now - t | |
o = (t, now, elapsed) | |
self.times.append(o) | |
@contextmanager | |
def mark(self, data): | |
assert isinstance(data, dict) | |
total_elapsed = sum(map(lambda x: x[2], self.times)) | |
o = (time.time(), total_elapsed, data) | |
self.marks.append(o) | |
self.times = [] | |
def dump(self): | |
h = "%-20s %-20s %-20s" | |
print h % ( "total_rows", "chunk_size", "time_taken") | |
for x in self.marks: | |
d = x[2] | |
print h % ( d['total_rows'], d['chunk_size'], x[1]) | |
class UUIDTest(object): | |
# The amount of UUIDs pre-generated for each database commit | |
# Setting this too high may result in "MySQL server has gone away" errors | |
COMMIT_CHUNK_SIZE = 20000 | |
# The amount of rows to insert at any given time in a single | |
# commit using executemany(). | |
LOOP_SIZE = 100000 | |
# The amount of rows to insert in total for each test | |
TOTAL_ROWS = 400000 | |
# The amount of UUIDs to select against | |
SELECT_CHUNK_SIZE = 2000 | |
def __init__(self, dbconn): | |
self.db = dbconn | |
def start(self): | |
self.create_database() | |
self.run_test('char32') | |
self.run_test('bin16') | |
self.run_test('longblob') | |
def generate_uuid(self, total): | |
"""Generate a list of random UUIDs. Time spent generating | |
these is not taken into consideration when comparing performance | |
between each test. This is because we are only interested in the | |
db select/insert performance""" | |
x = map(lambda x: uuid.uuid4(), range(self.COMMIT_CHUNK_SIZE)) | |
return x | |
###################################################### | |
# CHAR32 TESTS | |
###################################################### | |
def char32_insert(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (str(x).replace("-", ""), ), uuidlist) | |
# Insert UUIDs into database | |
c = self.db.cursor() | |
with self.ib.benchmark(): | |
c.executemany(""" | |
INSERT INTO char32 (uuid) | |
VALUES (%s) | |
""", ui) | |
self.db.commit() | |
c.close() | |
def char32_select(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (str(x).replace("-", ""), ), uuidlist) | |
selectui_values = ui | |
selectui_sql = [ '%s', ] * len(selectui_values) | |
selectui_sql = ",".join(selectui_sql) | |
# select | |
c = self.db.cursor() | |
with self.sb.benchmark(): | |
sql = """ | |
SELECT | |
id | |
FROM | |
`char32` | |
WHERE | |
uuid IN (%s) | |
""" % ( selectui_sql, ) | |
c.execute(sql, selectui_values) | |
r = c.fetchall() | |
assert len(r) == self.SELECT_CHUNK_SIZE | |
c.close() | |
###################################################### | |
# BIN16 TESTS | |
###################################################### | |
def bin16_insert(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (x.bytes, ), uuidlist) | |
# Insert UUIDs into database | |
c = self.db.cursor() | |
with self.ib.benchmark(): | |
c.executemany(""" | |
INSERT INTO `bin16` (uuid) | |
VALUES (%s) | |
""", ui) | |
self.db.commit() | |
c.close() | |
def bin16_select(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (x.bytes, ), uuidlist) | |
selectui_values = ui | |
selectui_sql = [ '%s', ] * len(selectui_values) | |
selectui_sql = ",".join(selectui_sql) | |
# select | |
c = self.db.cursor() | |
with self.sb.benchmark(): | |
sql = """ | |
SELECT | |
id | |
FROM | |
`bin16` | |
WHERE | |
uuid IN (%s) | |
""" % ( selectui_sql, ) | |
c.execute(sql, selectui_values) | |
r = c.fetchall() | |
assert len(r) == self.SELECT_CHUNK_SIZE | |
c.close() | |
###################################################### | |
# LongBlob TESTS | |
###################################################### | |
def longblob_insert(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (x.bytes, ), uuidlist) | |
# Insert UUIDs into database | |
c = self.db.cursor() | |
with self.ib.benchmark(): | |
c.executemany(""" | |
INSERT INTO `longblob` (uuid) | |
VALUES (%s) | |
""", ui) | |
self.db.commit() | |
c.close() | |
def longblob_select(self, uuidlist): | |
# convert list into necessary format | |
ui = map(lambda x: (x.bytes, ), uuidlist) | |
selectui_values = ui | |
selectui_sql = [ '%s', ] * len(selectui_values) | |
selectui_sql = ",".join(selectui_sql) | |
# select | |
c = self.db.cursor() | |
with self.sb.benchmark(): | |
sql = """ | |
SELECT | |
id | |
FROM | |
`longblob` | |
WHERE | |
uuid IN (%s) | |
""" % ( selectui_sql, ) | |
c.execute(sql, selectui_values) | |
r = c.fetchall() | |
assert len(r) == self.SELECT_CHUNK_SIZE | |
c.close() | |
def run_test(self, test_name): | |
# create benchmark object | |
self.ib = Benchmark() | |
self.sb = Benchmark() | |
# Fetch some UUIDs | |
cnt=0 | |
loopcnt=0 | |
selectcnt = 0 | |
while cnt < self.TOTAL_ROWS: | |
# incr | |
loopcnt += self.COMMIT_CHUNK_SIZE | |
cnt += self.COMMIT_CHUNK_SIZE | |
selectcnt += self.SELECT_CHUNK_SIZE | |
print "currently at", cnt | |
# Generate a list of UUIDs | |
uuidlist = self.generate_uuid(self.COMMIT_CHUNK_SIZE) | |
# insert | |
insert_fn = "%s_insert" % ( test_name, ) | |
getattr(self, insert_fn)(uuidlist) | |
# select | |
selectui = random.sample(uuidlist, self.SELECT_CHUNK_SIZE) | |
select_fn = "%s_select" % ( test_name, ) | |
getattr(self, select_fn)(selectui) | |
# to avoid skewing the graphs, group into | |
if loopcnt >= self.LOOP_SIZE: | |
self.ib.mark({ | |
'total_rows' : cnt, | |
'chunk_size' : loopcnt | |
}) | |
self.sb.mark({ | |
'total_rows' : cnt, | |
'chunk_size' : selectcnt | |
}) | |
loopcnt = 0 | |
selectcnt = 0 | |
print "TEST: INSERT %s" % ( test_name, ) | |
self.ib.dump() | |
print "TEST: SELECT %s" % ( test_name, ) | |
self.sb.dump() | |
def create_database(self): | |
"""Create databases and tables""" | |
c = self.db.cursor() | |
c.execute("DROP DATABASE IF EXISTS `uuidtest`") | |
c.execute("CREATE DATABASE `uuidtest`") | |
self.db.select_db("uuidtest") | |
c.execute(""" | |
CREATE TABLE `longblob` ( | |
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, | |
`uuid` longblob NOT NULL, | |
PRIMARY KEY (`id`), | |
UNIQUE KEY `uuid` (`uuid`(16)) | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
""") | |
c.execute(""" | |
CREATE TABLE `bin16` ( | |
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, | |
`uuid` binary(16) NOT NULL, | |
PRIMARY KEY (`id`), | |
UNIQUE KEY `uuid` (`uuid`) | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
""") | |
c.execute(""" | |
CREATE TABLE `char32` ( | |
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, | |
`uuid` char(32) DEFAULT NULL, | |
PRIMARY KEY (`id`), | |
UNIQUE KEY `uuid` (`uuid`) | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
""") | |
self.db.commit() | |
c = MySQLdb.connect('localhost', user='uuidtest', passwd='uuidtest') | |
u = UUIDTest(c) | |
u.start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results with a slightly larger data set: