Last active
January 1, 2021 13:51
-
-
Save josephernest/2c02f7627b83a32fd2086fe9dde15215 to your computer and use it in GitHub Desktop.
Quick benchmark of DuckDB / Sqlite3 (disclaimer: it doesn't necessarily showcase the power of both solutions, there might be better ways I'm not aware of)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Quick benchmark of DuckDB / Sqlite3 (disclaimer: it doesn't necessarily showcase the power of both solutions, there might be better ways I'm not aware of) | |
https://gist.github.com/josephernest/2c02f7627b83a32fd2086fe9dde15215.js | |
https://github.com/cwida/duckdb/issues/1249 | |
1M rows 10M rows 20M rows 50M rows | |
duckdb 13ms ? 130ms ? 285ms ? ? | |
sqlite 13ms 26 MB 113ms 260 MB 221ms 527 MB ? | |
""" | |
import time, random, string, sqlite3, duckdb, pandas, numpy | |
class elapsed: | |
def __enter__(self): self.start = time.time() | |
def __exit__(self, *args): print("%.1f ms" % ((time.time() - self.start)*1000)) | |
#################### QUERIES | |
CREATE = """CREATE TABLE data(id INTEGER, dt INTEGER, shop INTEGER, product INTEGER, aa INTEGER, bb INTEGER, customer INTEGER, PRIMARY KEY(shop, product, customer, id))""" | |
SELECT = "SELECT product, COUNT(DISTINCT customer) count FROM data WHERE shop == 2 GROUP BY product" | |
N = 10*1000*1000 | |
#################### DUCKDB | |
db = duckdb.connect(':memory:') | |
db.execute(CREATE) | |
db.execute(f"""INSERT INTO data SELECT i id, | |
round((random()*100000),0)::int + 1600000000 dt, | |
round((random()*26),0)::int shop, | |
round((random()*676),0)::int product, | |
round((random()*676),0)::int aa, 0 bb, | |
round((random()*676),0)::int customer FROM range(0,{N}) tbl(i);""") | |
db.commit() | |
with elapsed(): | |
db.execute(SELECT).fetchnumpy() | |
# how to measure the DB size of a :memory: DuckDB? | |
#################### SQLITE | |
db = sqlite3.connect(':memory:') | |
db.execute(CREATE + ' WITHOUT ROWID') # https://sqlite.org/withoutrowid.html optimization | |
for i in range(N): | |
dt = random.randint(1600000000, 1600100000) # random timestamp | |
shop = random.randrange(26) | |
product = random.randrange(676) | |
aa = random.randrange(676) | |
bb = 0 | |
customer = random.randrange(676) | |
db.execute("INSERT INTO data(id, dt, shop, product, aa, bb, customer) VALUES (?, ?, ?, ?, ?, ?, ?)", (i, dt, shop, product, aa, bb, customer)) | |
db.commit() | |
with elapsed(): | |
for _ in db.execute(SELECT): | |
pass | |
print('size (MB):', next(db.execute('PRAGMA page_count;'))[0] * next(db.execute('PRAGMA page_size;'))[0] / 1024**2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment