Skip to content

Instantly share code, notes, and snippets.

@romuald
Created August 17, 2016 16:21
Show Gist options
  • Save romuald/b15bd02857f37a902c286c51ea8231d9 to your computer and use it in GitHub Desktop.
Save romuald/b15bd02857f37a902c286c51ea8231d9 to your computer and use it in GitHub Desktop.
POC showing the difference in CPU/memory use of SQLAlchemy IN clause for large lists
import os
from time import time
from itertools import imap
from sqlalchemy import create_engine, Column, Integer, String, literal_column
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
SIZE = 80000
LITERAL = True
def getrss():
"""
Utility method to retrieve memory usage of current process
Linux-only, returns a string with memory suffix (usualy kB)
"""
for line in file('/proc/%d/status' % os.getpid()):
if line.startswith('VmRSS:'):
return line[6:].strip('\n\t ')
return ''
Base = declarative_base()
class MyClass(Base):
__tablename__ = 'my_table'
id = Column(Integer, primary_key=True)
name = Column(String(50))
def __repr__(self):
return '<MyClass %(id)d: %(name)s>' % self.__dict__
engine = create_engine('mysql://root@localhost/test', echo=0)
#engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
if session.query(MyClass).count() == 0:
me = MyClass()
me.name = 'John'
session.add(me)
thy = MyClass()
thy.name = 'Doe'
session.add(thy)
session.flush()
print 'Before init list', getrss() # ~20 Mb
ids = range(SIZE)
print 'Before query SQLAlchemy', getrss() # ~22 Mb
query = session.query(MyClass)
start = time()
try:
if LITERAL:
sids = ','.join(imap(session.connection().connection.connection.escape, ids))
in_ = [literal_column(sids)]
else:
in_ = ids
res = query.filter(MyClass.id.in_(in_)).all()
print res
finally:
# SQLLite driver reports "too many variables"
# MySQL correctly processes hundred of thousands of parameters
end = time()
print 'Executed in %.3fs' % (end - start)
# ~180 Mb (in list)
# ~27 Mb (literal IN)
print 'After query', getrss()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment