Created
August 17, 2016 16:21
-
-
Save romuald/b15bd02857f37a902c286c51ea8231d9 to your computer and use it in GitHub Desktop.
POC showing the difference in CPU/memory use of SQLAlchemy IN clause for large lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from time import time | |
from itertools import imap | |
from sqlalchemy import create_engine, Column, Integer, String, literal_column | |
from sqlalchemy.orm import sessionmaker | |
from sqlalchemy.ext.declarative import declarative_base | |
SIZE = 80000 | |
LITERAL = True | |
def getrss(): | |
""" | |
Utility method to retrieve memory usage of current process | |
Linux-only, returns a string with memory suffix (usualy kB) | |
""" | |
for line in file('/proc/%d/status' % os.getpid()): | |
if line.startswith('VmRSS:'): | |
return line[6:].strip('\n\t ') | |
return '' | |
Base = declarative_base() | |
class MyClass(Base): | |
__tablename__ = 'my_table' | |
id = Column(Integer, primary_key=True) | |
name = Column(String(50)) | |
def __repr__(self): | |
return '<MyClass %(id)d: %(name)s>' % self.__dict__ | |
engine = create_engine('mysql://root@localhost/test', echo=0) | |
#engine = create_engine('sqlite://') | |
Base.metadata.create_all(engine) | |
Session = sessionmaker(bind=engine) | |
session = Session() | |
if session.query(MyClass).count() == 0: | |
me = MyClass() | |
me.name = 'John' | |
session.add(me) | |
thy = MyClass() | |
thy.name = 'Doe' | |
session.add(thy) | |
session.flush() | |
print 'Before init list', getrss() # ~20 Mb | |
ids = range(SIZE) | |
print 'Before query SQLAlchemy', getrss() # ~22 Mb | |
query = session.query(MyClass) | |
start = time() | |
try: | |
if LITERAL: | |
sids = ','.join(imap(session.connection().connection.connection.escape, ids)) | |
in_ = [literal_column(sids)] | |
else: | |
in_ = ids | |
res = query.filter(MyClass.id.in_(in_)).all() | |
print res | |
finally: | |
# SQLLite driver reports "too many variables" | |
# MySQL correctly processes hundred of thousands of parameters | |
end = time() | |
print 'Executed in %.3fs' % (end - start) | |
# ~180 Mb (in list) | |
# ~27 Mb (literal IN) | |
print 'After query', getrss() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment