Skip to content

Instantly share code, notes, and snippets.

@jdunck
Last active January 17, 2017 23:23
Show Gist options
  • Save jdunck/c1734dd7737729732dac to your computer and use it in GitHub Desktop.
Save jdunck/c1734dd7737729732dac to your computer and use it in GitHub Desktop.
Chunked bulk_create under mysql
import math
from itertools import izip_longest
from django.db import connections
def get_cursor(alias='default'):
"""
Get a raw cursor out of django's DB connection
"""
wrapped_conn = connections[alias]
# hack to ensure connection is immediately opened:
if wrapped_conn.connection is None:
cursor = wrapped_conn.cursor()
raw_conn = wrapped_conn.connection
return raw_conn.cursor()
mysql_max_allowed_packet = None
def _get_statement_limit():
"""
Get the max statement size for mysql.
"""
global mysql_max_allowed_packet
if mysql_max_allowed_packet is None:
cursor = get_cursor()
cursor.execute("select @@session.max_allowed_packet")
mysql_max_allowed_packet = cursor.fetchone()[0]
cursor.close()
return mysql_max_allowed_packet
def chunker(iterable, n, fillvalue=None):
"""
Chunk any iterable by size `n`, filling the last chunk with
`fillvalue`.
"""
args = [iter(iterable)] * n
return izip_longest(*args, fillvalue=fillvalue)
def chunked_by_limit(iterable, limit, size_per):
"""
Split an iterable of `size_per` objects by the `limit`
per chunk.
"""
chunk_size = int(math.floor(limit / size_per))
for chunk in chunker(iterable, chunk_size):
yield filter(None, chunk) # filter removes fillvalues.
limit = _get_statement_limit()
# 1024 is a stand-in for the estimated size of a values (...) clause
# in the generated sql statement
# insert into x (a, b, c) values (1,2,3),(4,5,6),...
for chunk in chunked_by_limit(bulk_objs, limit, 1024):
Obj.objects.bulk_create(chunk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment