Last active
January 17, 2017 23:23
-
-
Save jdunck/c1734dd7737729732dac to your computer and use it in GitHub Desktop.
Chunked bulk_create under mysql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from itertools import izip_longest | |
from django.db import connections | |
def get_cursor(alias='default'): | |
""" | |
Get a raw cursor out of django's DB connection | |
""" | |
wrapped_conn = connections[alias] | |
# hack to ensure connection is immediately opened: | |
if wrapped_conn.connection is None: | |
cursor = wrapped_conn.cursor() | |
raw_conn = wrapped_conn.connection | |
return raw_conn.cursor() | |
mysql_max_allowed_packet = None | |
def _get_statement_limit(): | |
""" | |
Get the max statement size for mysql. | |
""" | |
global mysql_max_allowed_packet | |
if mysql_max_allowed_packet is None: | |
cursor = get_cursor() | |
cursor.execute("select @@session.max_allowed_packet") | |
mysql_max_allowed_packet = cursor.fetchone()[0] | |
cursor.close() | |
return mysql_max_allowed_packet | |
def chunker(iterable, n, fillvalue=None): | |
""" | |
Chunk any iterable by size `n`, filling the last chunk with | |
`fillvalue`. | |
""" | |
args = [iter(iterable)] * n | |
return izip_longest(*args, fillvalue=fillvalue) | |
def chunked_by_limit(iterable, limit, size_per): | |
""" | |
Split an iterable of `size_per` objects by the `limit` | |
per chunk. | |
""" | |
chunk_size = int(math.floor(limit / size_per)) | |
for chunk in chunker(iterable, chunk_size): | |
yield filter(None, chunk) # filter removes fillvalues. | |
limit = _get_statement_limit() | |
# 1024 is a stand-in for the estimated size of a values (...) clause | |
# in the generated sql statement | |
# insert into x (a, b, c) values (1,2,3),(4,5,6),... | |
for chunk in chunked_by_limit(bulk_objs, limit, 1024): | |
Obj.objects.bulk_create(chunk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment