eastside · October 26, 2021 17:59 · eastside · Oct 26, 2021
diff --git a/django_mysql_sscursor.py b/django_mysql_sscursor.py
 from contextlib import contextmanager

 import MySQLdb
 import MySQLdb.cursors
 import logging

 from django.conf import settings


 log = logging.getLogger(__name__)


 @contextmanager
 def streaming_cursor(
        db_name='default',
 ):
    """
    Uses MySQLdb's server side cursor, instead of the default,
    allowing us to stream results rather than load them all into memory at once.
    There's a lot of old discussion online, warning us not to use this but uh,
    I don't see why we shouldn't. The Django community seems to just not have
    that much exposure to MySQL.
    """
    # FIXME: Could cache the connection?
    connection = MySQLdb.connect(
        user=settings.DATABASES[db_name]['USER'],
        password=settings.DATABASES[db_name]['PASSWORD'] or '',
        host=settings.DATABASES[db_name]['HOST'],
        port=settings.DATABASES[db_name]['PORT'],
        database=settings.DATABASES[db_name]['NAME'],
        cursorclass=MySQLdb.cursors.SSCursor,
    )
    cursor = connection.cursor()
    try:
        yield cursor
    finally:
        cursor.close()
 
 
 def stream_queryset(
    model,
    field_names,
    other_field_names,
    sql,
    params,
    db_name='default',
 ):
     """
     Accepts an SQL query that ought to resolve to model instances.
     
     This is sort of like QuerySet.raw(), but I'm not going to implement all of the other 
     fancy bullshit raw() does.
     
     Provide all field names for the model instance in field names.
     Provide any other field names (Django would call these annotated field names) in other_field_names.
     
     SQL should produce results that match. Columns produced should be ordered like [*field_names, *other_field_names].
     """
     with streaming_cursor(db_name) as cursor:
         cursor.execute(sql, params)
         while rows := cursor.fetchmany(size=2000):
             for row in rows:
                 field_columns = row[0:len(field_names)]
                 instance = model.from_db(db=db_name, field_names=field_names, values=field_columns)
                 annotated_columns = row[len(field_names):len(field_names) + len(other_field_names)]
                 for (other_field_name, annotated_column) in zip(other_field_names, annotated_columns):
                     setattr(instance, other_field_name, annotated_column)
                 yield instance
	from contextlib import contextmanager

	import MySQLdb
	import MySQLdb.cursors
	import logging

	from django.conf import settings


	log = logging.getLogger(__name__)


	@contextmanager
	def streaming_cursor(
	db_name='default',
	):
	"""
	Uses MySQLdb's server side cursor, instead of the default,
	allowing us to stream results rather than load them all into memory at once.
	There's a lot of old discussion online, warning us not to use this but uh,
	I don't see why we shouldn't. The Django community seems to just not have
	that much exposure to MySQL.
	"""
	# FIXME: Could cache the connection?
	connection = MySQLdb.connect(
	user=settings.DATABASES[db_name]['USER'],
	password=settings.DATABASES[db_name]['PASSWORD'] or '',
	host=settings.DATABASES[db_name]['HOST'],
	port=settings.DATABASES[db_name]['PORT'],
	database=settings.DATABASES[db_name]['NAME'],
	cursorclass=MySQLdb.cursors.SSCursor,
	)
	cursor = connection.cursor()
	try:
	yield cursor
	finally:
	cursor.close()


	def stream_queryset(
	model,
	field_names,
	other_field_names,
	sql,
	params,
	db_name='default',
	):
	"""
	Accepts an SQL query that ought to resolve to model instances.

	This is sort of like QuerySet.raw(), but I'm not going to implement all of the other
	fancy bullshit raw() does.

	Provide all field names for the model instance in field names.
	Provide any other field names (Django would call these annotated field names) in other_field_names.

	SQL should produce results that match. Columns produced should be ordered like [field_names, other_field_names].
	"""
	with streaming_cursor(db_name) as cursor:
	cursor.execute(sql, params)
	while rows := cursor.fetchmany(size=2000):
	for row in rows:
	field_columns = row[0:len(field_names)]
	instance = model.from_db(db=db_name, field_names=field_names, values=field_columns)
	annotated_columns = row[len(field_names):len(field_names) + len(other_field_names)]
	for (other_field_name, annotated_column) in zip(other_field_names, annotated_columns):
	setattr(instance, other_field_name, annotated_column)
	yield instance