Forked from levidyrek/django_orm_optimization_cheat_sheet.py
Created
November 4, 2018 17:11
-
-
Save subhajeet2107/da9c09f36626f2953221db7157d7c36d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Django ORM Optimization Tips | |
Caveats: | |
* Only use optimizations that obfuscate the code if you need to. | |
* Not all of these tips are hard and fast rules. | |
* Use your judgement to determine what improvements are appropriate for your code. | |
""" | |
# --------------------------------------------------------------------------- | |
# 1. Profile | |
# --------------------------------------------------------------------------- | |
## Use these tools: | |
## * django-debug-toolbar | |
## * QuerySet.explain() | |
# --------------------------------------------------------------------------- | |
# 2. Be aware of QuerySet's lazy evaluation. | |
# --------------------------------------------------------------------------- | |
## 2a. When QuerySets are evaluated | |
# Iteration | |
for person in Person.objects.all(): | |
# Some logic | |
# Slicing/Indexing | |
Person.objects.all()[0] | |
# Pickling (i.e. serialization) | |
pickle.dumps(Person.objects.all()) | |
# Evaluation functions | |
repr(Person.objects.all()) | |
len(Person.objects.all()) | |
list(Person.objects.all()) | |
bool(Person.objects.all()) | |
# Other | |
[person for person in Person.objects.all()] # List comprehensions | |
person in Person.objects.all() # `in` checks | |
## 2b. When QuerySets are cached/not cached | |
### Not Cached | |
# Not reusing evaluated QuerySets | |
print([p.name for p in Person.objects.all()]) # QuerySet evaluated and cached | |
print([p.name for p in Person.objects.all()]) # New QuerySet is evaluated and cached | |
# Slicing/indexing unevaluated QuerySets | |
queryset = Person.objects.all() | |
print(queryset[0]) # Queries the database | |
print(queryset[0]) # Queries the database again | |
# Printing | |
print(Person.objects.all()) | |
### Cached | |
# Reusing an evaluated QuerySet | |
queryset = Person.objects.all() | |
print([p.name for p in queryset]) # QuerySet evaluated and cached | |
print([p.name for p in queryset]) # Cached results are used | |
# Slicing/indexing evaluated QuerySets | |
queryset = Person.objects.all() | |
list(queryset) # Queryset evaluated and cached | |
print(queryset[0]) # Cache used | |
print(queryset[0]) # Cache used | |
# --------------------------------------------------------------------------- | |
# 3. Be aware of which attributes are not cached. | |
# --------------------------------------------------------------------------- | |
## Not initially retrieved/cached | |
# Foreign-key related objects | |
person = Person.objects.get(id=1) | |
person.father # foreign object is retrieved and cached | |
person.father # cached version is used | |
## Never cached | |
# Callable attributes | |
person = Person.objects.get(id=1) | |
person.children.all() # Database hit | |
person.children.all() # Another database hit | |
# --------------------------------------------------------------------------- | |
# 4. Use select_related() and prefetch_related() when you will need everything. | |
# --------------------------------------------------------------------------- | |
# DON'T | |
queryset = Person.objects.all() | |
for person in queryset: | |
person.father # Foreign key relationship results in a database hit each iteration | |
# DO | |
queryset = Person.objects.all().select_related('father') # Foreign key object is included in query and cached | |
for person in queryset: | |
person.father # Hits the cache instead of the database | |
# --------------------------------------------------------------------------- | |
# 5. Try to avoid database queries in a loop. | |
# --------------------------------------------------------------------------- | |
# DON'T (contrived example) | |
filtered = Person.objects.filter(first_name='Shallan', last_name='Davar') | |
for age in range(18): | |
person = filtered.get(age=age) # Database query on each iteration | |
# DO (contrived example) | |
filtered = Person.objects.filter( # Narrow down the QuerySet to only what you need | |
first_name='Shallan', | |
last_name='Davar', | |
age_gte=0, | |
age_lte=18, | |
) | |
lookup = {person.age: person for person in filtered} # Evaluate the QuerySet and construct lookup | |
for age in range(18): | |
person = lookup[age] # No database query | |
# --------------------------------------------------------------------------- | |
# 6. Use iterator() to iterate through a very large QuerySet only once. | |
# --------------------------------------------------------------------------- | |
# Save memory by not caching anything | |
for person in Person.objects.iterator(): | |
# Some logic | |
# --------------------------------------------------------------------------- | |
# 7. Do work in the database rather than in Python. | |
# --------------------------------------------------------------------------- | |
## 7a. Use filter() and exclude() | |
# DON'T | |
for person in Person.objects.all(): | |
if person.age >= 18: | |
# Do something | |
# DO | |
for person in Person.objects.filter(age__gte=18): | |
# Do something | |
## 7b. Use F expressions | |
# DON'T | |
for person in Person.objects.all(): | |
person.age += 1 | |
person.save() | |
# DO | |
Person.objects.update(age=F('age') + 1) | |
## 7c. Do aggregation in the database, if possible | |
# DON'T | |
max_age = 0 | |
for person in Person.objects.all(): | |
if person.age > max_age: | |
max_age = person.age | |
# DO | |
max_age = Person.objects.all().aggregate(Max('age'))['age__max'] | |
# --------------------------------------------------------------------------- | |
# 8. Use values() and values_list() to get only the things you need. | |
# --------------------------------------------------------------------------- | |
## 8a. Use values() | |
# DON'T | |
age_lookup = { | |
person.name: person.age | |
for person in Person.objects.all() | |
} | |
# DO | |
age_lookup = { | |
person['name']: person['age'] | |
for person in Person.objects.values('name', 'age') | |
} | |
## 8b. Use values_list() | |
# DON'T | |
person_ids = [person.id for person in Person.objects.all()] | |
# DO | |
person_ids = Person.objects.values_list('id', flat=True) | |
# --------------------------------------------------------------------------- | |
# 9. Use defer() and only() when you know you won't need certain fields. | |
# | |
# * Use when you need a QuerySet instead of a list of dicts from values(). | |
# * Really only useful to defer fields that require significant processing to convert to a python object. | |
# --------------------------------------------------------------------------- | |
## 9a. Use defer() | |
queryset = Person.objects.defer('age') # Imagine age is computationally expensive | |
for person in queryset: | |
print(person.id) | |
print(person.name) | |
## 9b. Use only() | |
queryset = Person.objects.only('name') | |
for person in queryset: | |
print(person.name) | |
# --------------------------------------------------------------------------- | |
# 10. Use count() and exists() when you don't need the contents of the QuerySet. | |
# | |
# * Caveat: Only use these when you don't need to evaluate the QuerySet. | |
# --------------------------------------------------------------------------- | |
## 10a. Use count() | |
# DON'T | |
count = len(Person.objects.all()) # Evaluates the entire queryset | |
# DO | |
count = Person.objects.count() # Executes more efficient SQL to determine count | |
## 10b. Use exists() | |
# DON'T | |
exists = len(Person.objects.all()) > 0 | |
# DO | |
exists = Person.objects.exists() | |
# --------------------------------------------------------------------------- | |
# 11. Use delete() and update() when possible. | |
# --------------------------------------------------------------------------- | |
## 11a. Use delete() | |
# DON'T | |
for person in Person.objects.all(): | |
person.delete() | |
# DO | |
Person.objects.all().delete() | |
## 11b. Use update() | |
# DON'T | |
for person in Person.objects.all(): | |
person.age = 0 | |
person.save() | |
# DO | |
Person.objects.update(age=0) | |
# --------------------------------------------------------------------------- | |
# 12. Use bulk_create() when possible. | |
# | |
# * Caveats: https://docs.djangoproject.com/en/2.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create | |
# --------------------------------------------------------------------------- | |
# Bulk Create | |
names = ['Jeff', 'Beth', 'Tim'] | |
creates = [] | |
for name in names: | |
creates.append( | |
Person(name=name, age=0) | |
) | |
Person.objects.bulk_create(creates) | |
# Bulk add to many-to-many fields | |
person = Person.objects.get(id=1) | |
person.jobs.add(job1, job2, job3) | |
# --------------------------------------------------------------------------- | |
# 13. Use foreign key values directly. | |
# --------------------------------------------------------------------------- | |
# DON'T | |
father_id = Person.objects.get(id=1).father.id # Causes a needless database query | |
# DO | |
father_id = Person.objects.get(id=1).father_id # The foreign key is already cached. No query |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment