Mytherin · November 28, 2019 13:12
diff --git a/benchmark_mvcc.py b/benchmark_mvcc.py

 import sys, time, numpy, os
 if len(sys.argv) < 2:
 	print("Usage: python3 benchmark-mvcc [hyper|monetdb|sqlite|duckdb]")
 	exit(1)

 try:
 	os.remove('test.db')
 except:
 	pass
 system = sys.argv[1].lower()
 if system == 'hyper':
 	import psycopg2
 	con = psycopg2.connect(port=7483, host="localhost", user="hannes")
 	c = con.cursor()
 elif system == 'duckdb':
 	import duckdb
 	con = duckdb.connect('test.db')
 	c = con.cursor()
 elif system == 'sqlite':
 	import sqlite3
 	con = sqlite3.connect('test.db')
 	c = con.cursor()
 elif system == 'monetdb':
 	import pymonetdb
 	con = pymonetdb.connect(username='monetdb', password='monetdb', hostname='localhost', port=50000, database='demo')
 	c = con.cursor()
 else:
 	print("Unrecognized system " + system)
 	exit(1)

 def commit_connection():
 	global con, c
 	if system != 'duckdb':
 		# duckdb is on autocommit mode
 		con.commit()

 def load_data(many_columns=False):
 	global con, c
 	rows = 100
 	target_rows = 10000000
 	if system == 'duckdb':
 		os.remove('test.db')
 		con = duckdb.connect('test.db')
 		c = con.cursor()
 	else:
 		c.execute("DROP TABLE IF EXISTS mvcc_test")
 	if many_columns:
 		c.execute("CREATE TABLE mvcc_test(i INTEGER, j1 INTEGER, j2 INTEGER, j3 INTEGER, j4 INTEGER, j5 INTEGER, j6 INTEGER, j7 INTEGER, j8 INTEGER, j9 INTEGER, j10 INTEGER, j11 INTEGER, j12 INTEGER, j13 INTEGER, j14 INTEGER, j15 INTEGER, j16 INTEGER, j17 INTEGER, j18 INTEGER, j19 INTEGER, j20 INTEGER, j21 INTEGER, j22 INTEGER, j23 INTEGER, j24 INTEGER, j25 INTEGER, j26 INTEGER, j27 INTEGER, j28 INTEGER, j29 INTEGER, j30 INTEGER, j31 INTEGER, j32 INTEGER, j33 INTEGER, j34 INTEGER, j35 INTEGER, j36 INTEGER, j37 INTEGER, j38 INTEGER, j39 INTEGER, j40 INTEGER, j41 INTEGER, j42 INTEGER, j43 INTEGER, j44 INTEGER, j45 INTEGER, j46 INTEGER, j47 INTEGER, j48 INTEGER, j49 INTEGER, j50 INTEGER, j51 INTEGER, j52 INTEGER, j53 INTEGER, j54 INTEGER, j55 INTEGER, j56 INTEGER, j57 INTEGER, j58 INTEGER, j59 INTEGER, j60 INTEGER, j61 INTEGER, j62 INTEGER, j63 INTEGER, j64 INTEGER, j65 INTEGER, j66 INTEGER, j67 INTEGER, j68 INTEGER, j69 INTEGER, j70 INTEGER, j71 INTEGER, j72 INTEGER, j73 INTEGER, j74 INTEGER, j75 INTEGER, j76 INTEGER, j77 INTEGER, j78 INTEGER, j79 INTEGER, j80 INTEGER, j81 INTEGER, j82 INTEGER, j83 INTEGER, j84 INTEGER, j85 INTEGER, j86 INTEGER, j87 INTEGER, j88 INTEGER, j89 INTEGER, j90 INTEGER, j91 INTEGER, j92 INTEGER, j93 INTEGER, j94 INTEGER, j95 INTEGER, j96 INTEGER, j97 INTEGER, j98 INTEGER, j99 INTEGER)")
 	else:
 		c.execute("CREATE TABLE mvcc_test(i INTEGER)")
 	commit_connection()

 	c.execute("INSERT INTO mvcc_test (i) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21), (22), (23), (24), (25), (26), (27), (28), (29), (30), (31), (32), (33), (34), (35), (36), (37), (38), (39), (40), (41), (42), (43), (44), (45), (46), (47), (48), (49), (50), (51), (52), (53), (54), (55), (56), (57), (58), (59), (60), (61), (62), (63), (64), (65), (66), (67), (68), (69), (70), (71), (72), (73), (74), (75), (76), (77), (78), (79), (80), (81), (82), (83), (84), (85), (86), (87), (88), (89), (90), (91), (92), (93), (94), (95), (96), (97), (98), (99), (100)")

 	while rows < target_rows:
 		c.execute("INSERT INTO mvcc_test SELECT * FROM mvcc_test")
 		rows *= 2
 	commit_connection()
 	# make data hot
 	c.execute("SELECT SUM(i) FROM mvcc_test")
 	commit_connection()


 # perform single update and single scan after update
 def perform_single_update(f, selectivity, many_columns=False):
 	print("Run single update (selectivity %d, many columns: %s)" % (selectivity, str(many_columns)))
 	global con, c
 	load_data(many_columns)
 	# run the test
 	start = time.time()
 	if selectivity >= 100:
 		c.execute("UPDATE mvcc_test SET i=i+1")
 	elif selectivity > 0:
 		c.execute("UPDATE mvcc_test SET i=i+1 WHERE i <= " + str(selectivity))
 	end = time.time()
 	update_time = end - start
 	start = time.time()
 	c.execute("SELECT SUM(i) FROM mvcc_test")
 	result = c.fetchall()[0][0]
 	end = time.time()
 	select_time = end - start
 	commit_connection()
 	f.write("single_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, update_time))
 	f.write("select_after_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, select_time))


 # perform 10 updates in a row
 def perform_many_updates(f, many_columns=False):
 	print("Run many updates (many columns: %s)" % (str(many_columns), ))
 	global con, c
 	load_data(many_columns)
 	# run the test
 	start = time.time()
 	for i in range(0, 10):
 		c.execute("UPDATE mvcc_test SET i=i+1")
 		commit_connection()
 	end = time.time()
 	update_time = end - start
 	c.execute("SELECT SUM(i) FROM mvcc_test")
 	result = c.fetchall()[0][0]
 	f.write("many_updates%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, 0, result, update_time))
 	commit_connection()


 with open(system + '.csv', 'w+') as f:
 	f.write("benchmark\tsystem\tselectivity\tresult\ttime\n")
 	perform_single_update(f, 0)
 	perform_single_update(f, 1)
 	perform_single_update(f, 10)
 	perform_single_update(f, 100)
 	perform_many_updates(f)
 	# many columns
 	perform_single_update(f, 0, True)
 	perform_single_update(f, 1, True)
 	perform_single_update(f, 10, True)
 	perform_single_update(f, 100, True)
 	perform_many_updates(f, True)



 os.system('cat ' + system + '.csv')

	import sys, time, numpy, os
	if len(sys.argv) < 2:
	print("Usage: python3 benchmark-mvcc [hyper\|monetdb\|sqlite\|duckdb]")
	exit(1)

	try:
	os.remove('test.db')
	except:
	pass
	system = sys.argv[1].lower()
	if system == 'hyper':
	import psycopg2
	con = psycopg2.connect(port=7483, host="localhost", user="hannes")
	c = con.cursor()
	elif system == 'duckdb':
	import duckdb
	con = duckdb.connect('test.db')
	c = con.cursor()
	elif system == 'sqlite':
	import sqlite3
	con = sqlite3.connect('test.db')
	c = con.cursor()
	elif system == 'monetdb':
	import pymonetdb
	con = pymonetdb.connect(username='monetdb', password='monetdb', hostname='localhost', port=50000, database='demo')
	c = con.cursor()
	else:
	print("Unrecognized system " + system)
	exit(1)

	def commit_connection():
	global con, c
	if system != 'duckdb':
	# duckdb is on autocommit mode
	con.commit()

	def load_data(many_columns=False):
	global con, c
	rows = 100
	target_rows = 10000000
	if system == 'duckdb':
	os.remove('test.db')
	con = duckdb.connect('test.db')
	c = con.cursor()
	else:
	c.execute("DROP TABLE IF EXISTS mvcc_test")
	if many_columns:
	c.execute("CREATE TABLE mvcc_test(i INTEGER, j1 INTEGER, j2 INTEGER, j3 INTEGER, j4 INTEGER, j5 INTEGER, j6 INTEGER, j7 INTEGER, j8 INTEGER, j9 INTEGER, j10 INTEGER, j11 INTEGER, j12 INTEGER, j13 INTEGER, j14 INTEGER, j15 INTEGER, j16 INTEGER, j17 INTEGER, j18 INTEGER, j19 INTEGER, j20 INTEGER, j21 INTEGER, j22 INTEGER, j23 INTEGER, j24 INTEGER, j25 INTEGER, j26 INTEGER, j27 INTEGER, j28 INTEGER, j29 INTEGER, j30 INTEGER, j31 INTEGER, j32 INTEGER, j33 INTEGER, j34 INTEGER, j35 INTEGER, j36 INTEGER, j37 INTEGER, j38 INTEGER, j39 INTEGER, j40 INTEGER, j41 INTEGER, j42 INTEGER, j43 INTEGER, j44 INTEGER, j45 INTEGER, j46 INTEGER, j47 INTEGER, j48 INTEGER, j49 INTEGER, j50 INTEGER, j51 INTEGER, j52 INTEGER, j53 INTEGER, j54 INTEGER, j55 INTEGER, j56 INTEGER, j57 INTEGER, j58 INTEGER, j59 INTEGER, j60 INTEGER, j61 INTEGER, j62 INTEGER, j63 INTEGER, j64 INTEGER, j65 INTEGER, j66 INTEGER, j67 INTEGER, j68 INTEGER, j69 INTEGER, j70 INTEGER, j71 INTEGER, j72 INTEGER, j73 INTEGER, j74 INTEGER, j75 INTEGER, j76 INTEGER, j77 INTEGER, j78 INTEGER, j79 INTEGER, j80 INTEGER, j81 INTEGER, j82 INTEGER, j83 INTEGER, j84 INTEGER, j85 INTEGER, j86 INTEGER, j87 INTEGER, j88 INTEGER, j89 INTEGER, j90 INTEGER, j91 INTEGER, j92 INTEGER, j93 INTEGER, j94 INTEGER, j95 INTEGER, j96 INTEGER, j97 INTEGER, j98 INTEGER, j99 INTEGER)")
	else:
	c.execute("CREATE TABLE mvcc_test(i INTEGER)")
	commit_connection()

	c.execute("INSERT INTO mvcc_test (i) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21), (22), (23), (24), (25), (26), (27), (28), (29), (30), (31), (32), (33), (34), (35), (36), (37), (38), (39), (40), (41), (42), (43), (44), (45), (46), (47), (48), (49), (50), (51), (52), (53), (54), (55), (56), (57), (58), (59), (60), (61), (62), (63), (64), (65), (66), (67), (68), (69), (70), (71), (72), (73), (74), (75), (76), (77), (78), (79), (80), (81), (82), (83), (84), (85), (86), (87), (88), (89), (90), (91), (92), (93), (94), (95), (96), (97), (98), (99), (100)")

	while rows < target_rows:
	c.execute("INSERT INTO mvcc_test SELECT * FROM mvcc_test")
	rows *= 2
	commit_connection()
	# make data hot
	c.execute("SELECT SUM(i) FROM mvcc_test")
	commit_connection()


	# perform single update and single scan after update
	def perform_single_update(f, selectivity, many_columns=False):
	print("Run single update (selectivity %d, many columns: %s)" % (selectivity, str(many_columns)))
	global con, c
	load_data(many_columns)
	# run the test
	start = time.time()
	if selectivity >= 100:
	c.execute("UPDATE mvcc_test SET i=i+1")
	elif selectivity > 0:
	c.execute("UPDATE mvcc_test SET i=i+1 WHERE i <= " + str(selectivity))
	end = time.time()
	update_time = end - start
	start = time.time()
	c.execute("SELECT SUM(i) FROM mvcc_test")
	result = c.fetchall()[0][0]
	end = time.time()
	select_time = end - start
	commit_connection()
	f.write("single_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, update_time))
	f.write("select_after_update%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, selectivity, result, select_time))


	# perform 10 updates in a row
	def perform_many_updates(f, many_columns=False):
	print("Run many updates (many columns: %s)" % (str(many_columns), ))
	global con, c
	load_data(many_columns)
	# run the test
	start = time.time()
	for i in range(0, 10):
	c.execute("UPDATE mvcc_test SET i=i+1")
	commit_connection()
	end = time.time()
	update_time = end - start
	c.execute("SELECT SUM(i) FROM mvcc_test")
	result = c.fetchall()[0][0]
	f.write("many_updates%s\t%s\t%d\t%d\t%lf\n" % ("_many_columns" if many_columns else "", system, 0, result, update_time))
	commit_connection()


	with open(system + '.csv', 'w+') as f:
	f.write("benchmark\tsystem\tselectivity\tresult\ttime\n")
	perform_single_update(f, 0)
	perform_single_update(f, 1)
	perform_single_update(f, 10)
	perform_single_update(f, 100)
	perform_many_updates(f)
	# many columns
	perform_single_update(f, 0, True)
	perform_single_update(f, 1, True)
	perform_single_update(f, 10, True)
	perform_single_update(f, 100, True)
	perform_many_updates(f, True)



	os.system('cat ' + system + '.csv')