vernondcole · May 22, 2015 18:45 · vernondcole · May 22, 2015
diff --git a/0008_data_migrate.py b/0008_data_migrate.py
 #!/usr/bin/python3
 # this migration will call itself as a command-line program.
 from __future__ import unicode_literals, print_function
 import datetime, subprocess, psycopg2, sys, time

 from django.db import migrations
 from django.conf import settings

 CHUNK_SIZE = 1000
 def call_yourself(apps, schema_editor):
    my_name = __file__
    connection_string = 'host={} dbname={} user={} password={}'.format(
        settings.DATABASES['default']['HOST'],
        settings.DATABASES['default']['NAME'],
        settings.DATABASES['default']['USER'],
        settings.DATABASES['default']['PASSWORD']
    )
    subprocess.call(['chmod', '+x', my_name])  # must have the execute bit set
    print('\n    Calling myself as a subprocess...')
    subprocess.Popen(['{} "{}"'.format(my_name, connection_string)], shell=True)
    time.sleep(5.0)  # a short pause, so that a test database will have time to be actually processed before proceeding

 def do_nothing(apps, schema_editor):
    pass  # we need a callable for the reverse direction, so we use this.


 class Migration(migrations.Migration):

    dependencies = [
        ('phx', '0007_cdr_call_end_utc'),
    ]

    operations = [
        migrations.RunPython(call_yourself, do_nothing, atomic=False)
    ]


 # Command-line code to perform our data migration, because we can't do it in a migration using an Atomic transaction
 # this operates as a command-line program, not as a migrate script
 # The python program repeatedly calls an SQL function which does the migration one small chunk at a time.
 # this works only in PostgreSQL
 #  -- note: Python will fill in the CHUNK_SIZE in the LOOP statement using a .format() call
 FUNC_SQL = """create or replace function calc_end_times() returns int as $$
 declare
 n int;
    begin
        for loopid in 0..{} loop
          update phx_cdr set call_end_utc = call_utc + (exact_call_duration || ' second')::interval
           where id = (select id from phx_cdr where call_end_utc is null limit 1);
          exit when not FOUND;
        end loop;
        get diagnostics n = ROW_COUNT;
        return n;
    end;
 $$ LANGUAGE plpgsql;
 """
 RUN_SQL = "select calc_end_times();"  # the data migration is done as a side-effect of this SELECT statement. (Yech!)

 if __name__ == '__main__':
    print('Migration now running as a command line program to convert table phx_cdr...')
    conn = psycopg2.connect(sys.argv[1])  # create our own SQL connection without using django ORM.
    print('    Connected to', conn.dsn)
    c = conn.cursor()
    c.execute(FUNC_SQL.format(CHUNK_SIZE))  # note: this intentional SQL injection is safe from external attack.
    print('    Processing {:,} records per iteration.'.format(CHUNK_SIZE))
    conn.commit()
    c.execute('select 1')  # create a True value for the "while" statement below
    count = 0
    tick = datetime.datetime.now()
    while c.fetchone()[0] == 1:
        c.execute(RUN_SQL)
        conn.commit()
        now = datetime.datetime.now()
        elapsed = now - tick
        count += CHUNK_SIZE
        print('{:%H:%M:%S} last elapsed time={:6.2f} seconds.  now done={:,}'.format(
            now, elapsed.seconds + elapsed.microseconds/1000000.0, count))  # keep our user informed
        time.sleep(0.2)  # be nice. Let someone else have some system access for a moment.
        tick = datetime.datetime.now()
    c.close()
    conn.close()
    print('operation complete.')
	#!/usr/bin/python3
	# this migration will call itself as a command-line program.
	from __future__ import unicode_literals, print_function
	import datetime, subprocess, psycopg2, sys, time

	from django.db import migrations
	from django.conf import settings

	CHUNK_SIZE = 1000
	def call_yourself(apps, schema_editor):
	my_name = __file__
	connection_string = 'host={} dbname={} user={} password={}'.format(
	settings.DATABASES['default']['HOST'],
	settings.DATABASES['default']['NAME'],
	settings.DATABASES['default']['USER'],
	settings.DATABASES['default']['PASSWORD']
	)
	subprocess.call(['chmod', '+x', my_name]) # must have the execute bit set
	print('\n Calling myself as a subprocess...')
	subprocess.Popen(['{} "{}"'.format(my_name, connection_string)], shell=True)
	time.sleep(5.0) # a short pause, so that a test database will have time to be actually processed before proceeding

	def do_nothing(apps, schema_editor):
	pass # we need a callable for the reverse direction, so we use this.


	class Migration(migrations.Migration):

	dependencies = [
	('phx', '0007_cdr_call_end_utc'),
	]

	operations = [
	migrations.RunPython(call_yourself, do_nothing, atomic=False)
	]


	# Command-line code to perform our data migration, because we can't do it in a migration using an Atomic transaction
	# this operates as a command-line program, not as a migrate script
	# The python program repeatedly calls an SQL function which does the migration one small chunk at a time.
	# this works only in PostgreSQL
	# -- note: Python will fill in the CHUNK_SIZE in the LOOP statement using a .format() call
	FUNC_SQL = """create or replace function calc_end_times() returns int as $$
	declare
	n int;
	begin
	for loopid in 0..{} loop
	update phx_cdr set call_end_utc = call_utc + (exact_call_duration \|\| ' second')::interval
	where id = (select id from phx_cdr where call_end_utc is null limit 1);
	exit when not FOUND;
	end loop;
	get diagnostics n = ROW_COUNT;
	return n;
	end;
	$$ LANGUAGE plpgsql;
	"""
	RUN_SQL = "select calc_end_times();" # the data migration is done as a side-effect of this SELECT statement. (Yech!)

	if __name__ == '__main__':
	print('Migration now running as a command line program to convert table phx_cdr...')
	conn = psycopg2.connect(sys.argv[1]) # create our own SQL connection without using django ORM.
	print(' Connected to', conn.dsn)
	c = conn.cursor()
	c.execute(FUNC_SQL.format(CHUNK_SIZE)) # note: this intentional SQL injection is safe from external attack.
	print(' Processing {:,} records per iteration.'.format(CHUNK_SIZE))
	conn.commit()
	c.execute('select 1') # create a True value for the "while" statement below
	count = 0
	tick = datetime.datetime.now()
	while c.fetchone()[0] == 1:
	c.execute(RUN_SQL)
	conn.commit()
	now = datetime.datetime.now()
	elapsed = now - tick
	count += CHUNK_SIZE
	print('{:%H:%M:%S} last elapsed time={:6.2f} seconds. now done={:,}'.format(
	now, elapsed.seconds + elapsed.microseconds/1000000.0, count)) # keep our user informed
	time.sleep(0.2) # be nice. Let someone else have some system access for a moment.
	tick = datetime.datetime.now()
	c.close()
	conn.close()
	print('operation complete.')