Created
December 28, 2020 18:52
-
-
Save Mytherin/77d54f2242453d7612f953644c5d18a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import sqlite3 | |
import time, random, string | |
base_tuples = 200*1000 | |
repeat_insert = 8 | |
def run_benchmark_strings(db, dbname, print_tuple_count, count_distinct=True): | |
db.execute("""CREATE TABLE data(id INTEGER, dt INTEGER, shop TEXT, product TEXT, aa TEXT, bb INTEGER, | |
customer TEXT);""") | |
db.execute("begin transaction") | |
for i in range(base_tuples): | |
dt = random.randint(1600000000, 1600100000) # random datetime (unix timestamp) | |
shop = ''.join(random.choices(string.ascii_uppercase, k=1)) # 26 possibilities | |
product = ''.join(random.choices(string.ascii_uppercase, k=2)) # 676 possibilities | |
aa = ''.join(random.choices(string.ascii_uppercase, k=2)) # 676 possibilities | |
customer = ''.join(random.choices(string.ascii_uppercase, k=2)) # 676 possibilities | |
db.execute("INSERT INTO data(id, dt, shop, product, aa, bb, customer) VALUES (?, ?, ?, ?, ?, ?, ?)", | |
[i, dt, shop, product, aa, 0, customer]) | |
for i in range(repeat_insert): | |
db.execute('INSERT INTO data SELECT * FROM data') | |
db.commit() | |
if print_tuple_count: | |
tuple_count = db.execute('SELECT COUNT(*) FROM data').fetchall()[0][0] | |
print("Total tuples: %d" % (tuple_count,)) | |
start = time.time() | |
if not count_distinct: | |
db.execute("SELECT product, COUNT(customer) count FROM data WHERE shop == 'A' GROUP BY product").fetchall() | |
else: | |
db.execute("SELECT product, COUNT(DISTINCT customer) count FROM data WHERE shop == 'A' GROUP BY product").fetchall() | |
print("%s: %.1f ms" % (dbname, (time.time() - start)*1000)) | |
def run_benchmark_integers(db, dbname, print_tuple_count, count_distinct=True): | |
db.execute("""CREATE TABLE data(id INTEGER, dt INTEGER, shop INTEGER, product INTEGER, aa INTEGER, bb INTEGER, | |
customer INTEGER);""") | |
db.execute("begin transaction") | |
for i in range(base_tuples): | |
dt = random.randint(1600000000, 1600100000) # random datetime (unix timestamp) | |
shop = random.randint(0, 26) # 26 possibilities | |
product = random.randint(0, 676) # 676 possibilities | |
aa = random.randint(0, 676) # 676 possibilities | |
customer = random.randint(0, 676) # 676 possibilities | |
db.execute("INSERT INTO data(id, dt, shop, product, aa, bb, customer) VALUES (?, ?, ?, ?, ?, ?, ?)", | |
[i, dt, shop, product, aa, 0, customer]) | |
for i in range(repeat_insert): | |
db.execute('INSERT INTO data SELECT * FROM data') | |
db.commit() | |
if print_tuple_count: | |
tuple_count = db.execute('SELECT COUNT(*) FROM data').fetchall()[0][0] | |
print("Total tuples: %d" % (tuple_count,)) | |
start = time.time() | |
if not count_distinct: | |
db.execute("SELECT product, COUNT(customer) count FROM data WHERE shop == 1 GROUP BY product").fetchall() | |
else: | |
db.execute("SELECT product, COUNT(DISTINCT customer) count FROM data WHERE shop == 1 GROUP BY product").fetchall() | |
print("%s: %.1f ms" % (dbname, (time.time() - start)*1000)) | |
duck_db = duckdb.connect(':memory:') | |
sqlite_db = sqlite3.connect(':memory:') | |
print("COUNT(DISTINCT) - VARCHAR") | |
run_benchmark_strings(duck_db, 'DuckDB', True) | |
run_benchmark_strings(sqlite_db, 'SQLite', False) | |
duck_db = duckdb.connect(':memory:') | |
sqlite_db = sqlite3.connect(':memory:') | |
print("COUNT(DISTINCT) - INTEGER") | |
run_benchmark_integers(duck_db, 'DuckDB', True) | |
run_benchmark_integers(sqlite_db, 'SQLite', False) | |
print("COUNT - VARCHAR") | |
duck_db = duckdb.connect(':memory:') | |
sqlite_db = sqlite3.connect(':memory:') | |
run_benchmark_strings(duck_db, 'DuckDB', True, False) | |
run_benchmark_strings(sqlite_db, 'SQLite', False, False) | |
duck_db = duckdb.connect(':memory:') | |
sqlite_db = sqlite3.connect(':memory:') | |
print("COUNT - INTEGER") | |
run_benchmark_integers(duck_db, 'DuckDB', True, False) | |
run_benchmark_integers(sqlite_db, 'SQLite', False, False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment