Last active
December 17, 2020 14:10
-
-
Save karenc/0a043511ea6812bb85f4b424e9a98779 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import functools | |
import os | |
import sqlite3 | |
import time | |
import psycopg2 | |
import sqlalchemy | |
def setup_sqlite(): | |
sqlite3_conn = sqlite3.connect(sqlite_db) | |
sqlite3_cursor = sqlite3_conn.cursor() | |
sqlite3_cursor.execute( | |
"select tbl_name from sqlite_master where tbl_name = ? and type = 'table'", | |
(table_name,)) | |
if sqlite3_cursor.fetchall(): | |
sqlite3_cursor.execute(f'select count(*) from {table_name}') | |
count = sqlite3_cursor.fetchone()[0] | |
print(f'Use existing table {table_name} ({count} rows) in sqlite database') | |
else: | |
print(f'Create new table {table_name} in sqlite database') | |
created_table['sqlite'] = True | |
sqlite3_cursor.execute( | |
f'create table {table_name} (annot_rowid integer primary key, image_rowid integer not null)') | |
for i in range(queries): | |
sqlite3_cursor.execute( | |
f'insert into {table_name} (annot_rowid, image_rowid) values (?, ?)', | |
(i, i * 10)) | |
sqlite3_conn.close() | |
def setup_postgres(): | |
with psycopg2.connect(postgres_db) as conn: | |
with conn.cursor() as cursor: | |
cursor.execute( | |
'select table_name from information_schema.tables where table_name = %s and table_schema = %s', | |
(table_name, postgres_schema) | |
) | |
if cursor.fetchall(): | |
cursor.execute(f'select count(*) from {table_name}') | |
count = cursor.fetchone()[0] | |
print(f'Use existing table {table_name} ({count} rows) in postgres database') | |
else: | |
created_table['postgres'] = True | |
print(f'Create new table {table_name} in postgres database') | |
cursor.execute( | |
f'create table {table_name} (annot_rowid integer primary key, image_rowid integer not null)') | |
for i in range(queries): | |
cursor.execute( | |
f'insert into {table_name} (annot_rowid, image_rowid) values (%s, %s)', | |
(i, i * 10)) | |
def clean_up(): | |
if created_table['sqlite']: | |
print(f'Drop table {table_name} in sqlite') | |
sqlite_conn = sqlite3.connect(sqlite_db) | |
sqlite_cursor = sqlite_conn.cursor() | |
sqlite_cursor.execute(f'drop table {table_name}') | |
if created_table['postgres']: | |
print(f'Drop table {table_name} in postgres') | |
with psycopg2.connect(postgres_db) as conn: | |
with conn.cursor() as cursor: | |
cursor.execute(f'drop table {postgres_schema}.{table_name}') | |
print('---') | |
def average_time(func): | |
times = [] | |
@functools.wraps(func) | |
def wrapper(*args, **kwargs): | |
for i in range(iterations): | |
start = time.time() | |
func(*args, **kwargs) | |
times.append(time.time() - start) | |
print(f'Average time for running {func}: {sum(times) / iterations}') | |
return wrapper | |
@average_time | |
def run_sqlite(): | |
stmt = f'select image_rowid from {table_name} where annot_rowid = ?' | |
for rowid in range(queries): | |
sqlite3_cursor.execute(stmt, (rowid,)) | |
sqlite3_cursor.fetchall() | |
@average_time | |
def run_psycopg2(): | |
stmt = f'select image_rowid from {table_name} where annot_rowid = %s' | |
for rowid in range(queries): | |
psycopg2_cursor.execute(stmt, (rowid,)) | |
psycopg2_cursor.fetchall() | |
@average_time | |
def run_sqlalchemy_sqlite(): | |
stmt = f'select image_rowid from {table_name} where annot_rowid = ?' | |
for rowid in range(queries): | |
sqlalchemy_conn['sqlite'].execute(stmt, (rowid,)).fetchall() | |
@average_time | |
def run_sqlalchemy_postgres(): | |
stmt = f'select image_rowid from {table_name} where annot_rowid = %s' | |
for rowid in range(queries): | |
sqlalchemy_conn['postgres'].execute(stmt, (rowid,)).fetchall() | |
@average_time | |
def run_sqlalchemy_text_sqlite(): | |
stmt = sqlalchemy.sql.text(f'select image_rowid from {table_name} where annot_rowid = :rowid') | |
for rowid in range(queries): | |
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_text_postgres(): | |
stmt = sqlalchemy.sql.text(f'select image_rowid from {table_name} where annot_rowid = :rowid') | |
for rowid in range(queries): | |
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_table_text_sqlite(): | |
annot_rowid = annotations['sqlite'].c['annot_rowid'] | |
image_rowid = annotations['sqlite'].c['image_rowid'] | |
stmt = sqlalchemy.select([image_rowid]).\ | |
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type)) | |
stmt = sqlalchemy.sql.text(str(stmt)) | |
for rowid in range(queries): | |
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_table_text_postgres(): | |
annot_rowid = annotations['postgres'].c['annot_rowid'] | |
image_rowid = annotations['postgres'].c['image_rowid'] | |
stmt = sqlalchemy.select([image_rowid]).\ | |
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type)) | |
stmt = sqlalchemy.sql.text(str(stmt)) | |
for rowid in range(queries): | |
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_table_sqlite(): | |
annot_rowid = annotations['sqlite'].c['annot_rowid'] | |
image_rowid = annotations['sqlite'].c['image_rowid'] | |
stmt = sqlalchemy.select([image_rowid]).\ | |
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type)) | |
for rowid in range(queries): | |
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_table_postgres(): | |
annot_rowid = annotations['postgres'].c['annot_rowid'] | |
image_rowid = annotations['postgres'].c['image_rowid'] | |
stmt = sqlalchemy.select([image_rowid]).\ | |
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type)) | |
for rowid in range(queries): | |
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall() | |
@average_time | |
def run_sqlalchemy_reduced_queries_sqlite(): | |
annot_rowid = annotations['sqlite'].c['annot_rowid'] | |
image_rowid = annotations['sqlite'].c['image_rowid'] | |
params_iter = list(range(queries)) | |
result_map = {} | |
BATCH_SIZE = 250000 | |
stmt = sqlalchemy.select([annot_rowid, image_rowid]) | |
where_clause = annot_rowid.in_(sqlalchemy.sql.bindparam('value', expanding=True)) | |
stmt = stmt.where(where_clause) | |
for batch in range(queries // BATCH_SIZE + 1): | |
val_list = sqlalchemy_conn['sqlite'].execute(stmt, value=params_iter[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]) | |
for val in val_list: | |
existing = result_map.setdefault(val[0], []) | |
if val[1:] not in existing: | |
existing.append(val[1:]) | |
@average_time | |
def run_sqlalchemy_reduced_queries_postgres(): | |
annot_rowid = annotations['postgres'].c['annot_rowid'] | |
image_rowid = annotations['postgres'].c['image_rowid'] | |
params_iter = list(range(queries)) | |
result_map = {} | |
stmt = sqlalchemy.select([annot_rowid, image_rowid]) | |
where_clause = annot_rowid.in_(sqlalchemy.sql.bindparam('value', expanding=True)) | |
stmt = stmt.where(where_clause) | |
val_list = sqlalchemy_conn['postgres'].execute(stmt, value=params_iter) | |
for val in val_list: | |
existing = result_map.setdefault(val[0], []) | |
if val[1:] not in existing: | |
existing.append(val[1:]) | |
# Experimental: https://docs.sqlalchemy.org/en/14/core/connections.html#sql-compilation-caching | |
# Couldn't get it to be quick | |
# @average_time | |
# def run_sqlalchemy_table_lambda(): | |
# annot_rowid = annotations.c['image_rowid'] | |
# for rowid in range(queries): | |
# stmt = sqlalchemy.lambda_stmt(lambda: sqlalchemy.select([annot_rowid])) | |
# stmt += lambda s: s.where(annot_rowid == rowid) #sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type)) | |
# sqlalchemy_conn.execute(stmt) | |
if __name__ == '__main__': | |
sqlite_db = os.getenv('SQLITE_DB', 'benchmark.sqlite3') | |
postgres_db = os.getenv('POSTGRES_DB', 'postgresql://wbia:wbia@db/testdb1') | |
postgres_schema = os.getenv('POSTGRES_SCHEMA', 'public') | |
queries = int(os.getenv('QUERIES', 10000)) | |
iterations = int(os.getenv('ITERATIONS', 10)) | |
table_name = os.getenv('TABLE_NAME', 'annotations') | |
print(f'''\ | |
SQLITE_DB={sqlite_db} | |
POSTGRES_DB={postgres_db} | |
POSTGRES_SCHEMA={postgres_schema} | |
QUERIES={queries} | |
ITERATIONS={iterations} | |
TABLE_NAME={table_name} | |
sqlalchemy_version={sqlalchemy.__version__} | |
''') | |
engine = { | |
'sqlite': sqlalchemy.create_engine(f'sqlite:///{sqlite_db}'), | |
'postgres': sqlalchemy.create_engine(postgres_db), | |
} | |
sqlalchemy_conn = { | |
'sqlite': engine['sqlite'].connect(), | |
'postgres': engine['postgres'].connect(), | |
} | |
created_table = { | |
'sqlite': None, | |
'postgres': None, | |
} | |
setup_sqlite() | |
sqlite3_conn = sqlite3.connect(sqlite_db) | |
sqlite3_cursor = sqlite3_conn.cursor() | |
setup_postgres() | |
psycopg2_conn = psycopg2.connect(postgres_db) | |
psycopg2_cursor = psycopg2_conn.cursor() | |
psycopg2_cursor.execute('set schema %s', (postgres_schema,)) | |
sqlalchemy_md = { | |
'sqlite': sqlalchemy.MetaData(), | |
'postgres': sqlalchemy.MetaData(), | |
} | |
sqlalchemy_md['sqlite'].reflect(bind=engine['sqlite']) | |
sqlalchemy_md['postgres'].reflect(bind=engine['postgres'], | |
schema=postgres_schema) | |
annotations = { | |
'sqlite': sqlalchemy_md['sqlite'].tables[table_name], | |
'postgres': sqlalchemy_md['postgres'].tables[ | |
f'{postgres_schema}.{table_name}'], | |
} | |
try: | |
run_sqlite() | |
run_psycopg2() | |
run_sqlalchemy_sqlite() | |
run_sqlalchemy_postgres() | |
run_sqlalchemy_text_sqlite() | |
run_sqlalchemy_text_postgres() | |
run_sqlalchemy_table_text_sqlite() | |
run_sqlalchemy_table_text_postgres() | |
run_sqlalchemy_table_sqlite() | |
run_sqlalchemy_table_postgres() | |
run_sqlalchemy_reduced_queries_sqlite() | |
run_sqlalchemy_reduced_queries_postgres() | |
finally: | |
sqlalchemy_conn.clear() | |
sqlite3_conn.close() | |
psycopg2_cursor.close() | |
psycopg2_conn.close() | |
clean_up() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Using sqlalchemy version
1.4.0b1
, the average time for doing10000
queries in seconds: