Skip to content

Instantly share code, notes, and snippets.

@Mytherin
Created November 28, 2019 13:12
Show Gist options
  • Save Mytherin/d2daac8f1565bd5c0b4530952a9094c1 to your computer and use it in GitHub Desktop.
Save Mytherin/d2daac8f1565bd5c0b4530952a9094c1 to your computer and use it in GitHub Desktop.
import sys, time, numpy, os
if len(sys.argv) < 2:
print("Usage: python3 benchmark-mvcc [hyper|monetdb|sqlite|duckdb]")
exit(1)
try:
os.remove('test.db')
except:
pass
system = sys.argv[1].lower()
if system == 'hyper':
import psycopg2
con = psycopg2.connect(port=7483, host="localhost", user="hannes")
c = con.cursor()
elif system == 'duckdb':
import duckdb
con = duckdb.connect('test.db')
c = con.cursor()
elif system == 'sqlite':
import sqlite3
con = sqlite3.connect('test.db')
c = con.cursor()
elif system == 'monetdb':
import pymonetdb
con = pymonetdb.connect(username='monetdb', password='monetdb', hostname='localhost', port=50000, database='demo')
c = con.cursor()
else:
print("Unrecognized system " + system)
exit(1)
def commit_connection():
global con, c
if system != 'duckdb':
# duckdb is on autocommit mode
con.commit()
def load_data(many_columns=False):
global con, c
rows = 100
target_rows = 10000000
if system == 'duckdb':
os.remove('test.db')
con = duckdb.connect('test.db')
c = con.cursor()
else:
c.execute("DROP TABLE IF EXISTS mvcc_test")
if many_columns:
c.execute("CREATE TABLE mvcc_test(i INTEGER, j1 INTEGER, j2 INTEGER, j3 INTEGER, j4 INTEGER, j5 INTEGER, j6 INTEGER, j7 INTEGER, j8 INTEGER, j9 INTEGER, j10 INTEGER, j11 INTEGER, j12 INTEGER, j13 INTEGER, j14 INTEGER, j15 INTEGER, j16 INTEGER, j17 INTEGER, j18 INTEGER, j19 INTEGER, j20 INTEGER, j21 INTEGER, j22 INTEGER, j23 INTEGER, j24 INTEGER, j25 INTEGER, j26 INTEGER, j27 INTEGER, j28 INTEGER, j29 INTEGER, j30 INTEGER, j31 INTEGER, j32 INTEGER, j33 INTEGER, j34 INTEGER, j35 INTEGER, j36 INTEGER, j37 INTEGER, j38 INTEGER, j39 INTEGER, j40 INTEGER, j41 INTEGER, j42 INTEGER, j43 INTEGER, j44 INTEGER, j45 INTEGER, j46 INTEGER, j47 INTEGER, j48 INTEGER, j49 INTEGER, j50 INTEGER, j51 INTEGER, j52 INTEGER, j53 INTEGER, j54 INTEGER, j55 INTEGER, j56 INTEGER, j57 INTEGER, j58 INTEGER, j59 INTEGER, j60 INTEGER, j61 INTEGER, j62 INTEGER, j63 INTEGER, j64 INTEGER, j65 INTEGER, j66 INTEGER, j67 INTEGER, j68 INTEGER, j69 INTEGER, j70 INTEGER, j71 INTEGER, j72 INTEGER, j73 INTEGER, j74 INTEGER, j75 INTEGER, j76 INTEGER, j77 INTEGER, j78 INTEGER, j79 INTEGER, j80 INTEGER, j81 INTEGER, j82 INTEGER, j83 INTEGER, j84 INTEGER, j85 INTEGER, j86 INTEGER, j87 INTEGER, j88 INTEGER, j89 INTEGER, j90 INTEGER, j91 INTEGER, j92 INTEGER, j93 INTEGER, j94 INTEGER, j95 INTEGER, j96 INTEGER, j97 INTEGER, j98 INTEGER, j99 INTEGER)")
else:
c.execute("CREATE TABLE mvcc_test(i INTEGER)")
commit_connection()
c.execute("INSERT INTO mvcc_test (i) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21), (22), (23), (24), (25), (26), (27), (28), (29), (30), (31), (32), (33), (34), (35), (36), (37), (38), (39), (40), (41), (42), (43), (44), (45), (46), (47), (48), (49), (50), (51), (52), (53), (54), (55), (56), (57), (58), (59), (60), (61), (62), (63), (64), (65), (66), (67), (68), (69), (70), (71), (72), (73), (74), (75), (76), (77), (78), (79), (80), (81), (82), (83), (84), (85), (86), (87), (88), (89), (90), (91), (92), (93), (94), (95), (96), (97), (98), (99), (100)")
while rows < target_rows:
c.execute("INSERT INTO mvcc_test SELECT * FROM mvcc_test")
rows *= 2
commit_connection()
# make data hot
c.execute("SELECT SUM(i) FROM mvcc_test")
commit_connection()
# perform single update and single scan after update
def perform_single_update(f, selectivity, many_columns=False):
print("Run single update (selectivity %d, many columns: %s)" % (selectivity, str(many_columns)))
global con, c
load_data(many_columns)
# run the test
start = time.time()
if selectivity >= 100:
c.execute("UPDATE mvcc_test SET i=i+1")
elif selectivity > 0:
c.execute("UPDATE mvcc_test SET i=i+1 WHERE i <= " + str(selectivity))
end = time.time()
update_time = end - start
start = time.time()
c.execute("SELECT SUM(i) FROM mvcc_test")
result = c.fetchall()[0][0]
end = time.time()
select_time = end - start
commit_connection()
f.write("single_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, update_time))
f.write("select_after_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, select_time))
# perform 10 updates in a row
def perform_many_updates(f, many_columns=False):
print("Run many updates (many columns: %s)" % (str(many_columns), ))
global con, c
load_data(many_columns)
# run the test
start = time.time()
for i in range(0, 10):
c.execute("UPDATE mvcc_test SET i=i+1")
commit_connection()
end = time.time()
update_time = end - start
c.execute("SELECT SUM(i) FROM mvcc_test")
result = c.fetchall()[0][0]
f.write("many_updates%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, 0, result, update_time))
commit_connection()
with open(system + '.csv', 'w+') as f:
f.write("benchmark\tsystem\tselectivity\tresult\ttime\n")
perform_single_update(f, 0)
perform_single_update(f, 1)
perform_single_update(f, 10)
perform_single_update(f, 100)
perform_many_updates(f)
# many columns
perform_single_update(f, 0, True)
perform_single_update(f, 1, True)
perform_single_update(f, 10, True)
perform_single_update(f, 100, True)
perform_many_updates(f, True)
os.system('cat ' + system + '.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment