Skip to content

Instantly share code, notes, and snippets.

@gregplaysguitar
Last active June 23, 2017 01:50
Show Gist options
  • Save gregplaysguitar/87464b5d604153eb373675fed34bae79 to your computer and use it in GitHub Desktop.
Save gregplaysguitar/87464b5d604153eb373675fed34bae79 to your computer and use it in GitHub Desktop.
MySQL to PostgreSQL migrate script for Django. Turns a mysql export into a matching postgres database.
"""
Usage
=====
1. Install Fabric - http://www.fabfile.org/
2. Obtain db_converter.py from
https://github.com/lanyrd/mysql-postgresql-converter - put it in the same
directory as this fabfile.
3. Fill out config in this file, and make sure your migrations for each
application match the mysql schema.
4. To do a dry run on a test server:
fab test_server initial_migrate dump_mysql fetch_mysql process_mysql \
import_data
for deployment, change test_server to prod_server.
5. Make sure you do the manual "reset sequences" step at the end.
"""
from fabric.api import run, env
from fabric.decorators import hosts
from fabric.context_managers import cd, prefix
# CONFIGURATION
MIGRATE_FIRST = (
# list any django apps which need to be migrated first due to dependencies
)
# credentials for the the old server and mysql database
OLD_SERVER = ''
OLD_DB_USER = ''
OLD_DB_PASS = ''
OLD_DB_NAME = ''
PRE_QUERIES = [
# run on the new postgres database prior to data insertion
'ALTER TABLE django_content_type ADD COLUMN name varchar(100)',
]
POST_QUERIES = [
# run on the new postgres database after data insertion
"ALTER TABLE django_content_type DROP COLUMN name;",
]
def test_server():
env.hosts = [''] # new host server
env.project_path = '' # django project root
env.python = '' # python executable to use - i.e. from a virtualenv
# new (postgres) db credentials
env.db_name = ''
env.db_user = ''
env.db_pass = ''
def prod_server():
# as per test_server
env.hosts = ['']
env.project_path = ''
env.python = ''
env.db_name = ''
env.db_user = ''
env.db_pass = ''
# list all tables which need to be exported mysql - i.e. the output of
# SHOW TABLES;
TABLES = """
auth_group
auth_group_permissions
auth_permission
auth_user
auth_user_groups
auth_user_user_permissions
django_admin_log
django_content_type
django_session
django_site
"""
# List all boolean fields in the database - these need to be manually converted
# To generate in postgres:
# SELECT table_name, column_name
# FROM information_schema.columns
# WHERE table_schema <> 'pg_catalog' AND data_type = 'boolean';
#
# then find and replace: ([\w\_]+)\t([\w\_]+) -> ('$1', '$2'),
BOOLEAN_FIELDS = (
('auth_user', 'is_superuser'),
('auth_user', 'is_staff'),
('auth_user', 'is_active'),
)
# FAB FUNCTIONS
def initial_migrate(overwrite=False, boolean_fields=BOOLEAN_FIELDS,
migrate_first=MIGRATE_FIRST, pre_queries=PRE_QUERIES):
"""Run the initial migration on the new database, and prepare it for data
import. Assumes a blank starting point. """
with cd(env.project_path), prefix('export PGPASSWORD=%s' % env.db_pass):
for app in migrate_first:
run('%s ./manage.py migrate %s 0001' % (env.python, app))
run('%s ./manage.py migrate' % (env.python))
# make all bools integers since that's what mysql gives us
for table, field in boolean_fields:
run('psql -U %s %s -c \'ALTER TABLE %s ALTER "%s" TYPE '
'INTEGER USING CASE WHEN false THEN 0 ELSE 1 END;\'' % (
env.db_user, env.db_name, table, field))
for query in pre_queries:
run('psql -U %s %s -c \'%s\'' % (
env.db_user, env.db_name, query.replace("\n", ' ')))
@hosts(OLD_SERVER)
def dump_mysql(tables=TABLES):
"""Export the old database as an .sql file """
run(('mysqldump --compatible=postgresql --no-create-info '
'--default-character-set=utf8 -c -u %s -p%s %s %s > data.sql') % (
OLD_DB_USER, OLD_DB_PASS, OLD_DB_NAME, tables.replace("\n", ' ')))
def fetch_mysql():
"""Copy the old database dump to the new server. """
with cd(env.project_path):
if env.host_string == OLD_SERVER:
# same server, so straight copy
run('cp ~/data.sql ./')
else:
# copy from remote server
run('rsync -z %s:~/data.sql ./' % (OLD_SERVER))
def process_mysql():
"""Process the database export ready for import into postgres. """
with cd(env.project_path):
run('python ./db_converter.py data.sql data.psql')
def import_data(post_queries=POST_QUERIES, boolean_fields=BOOLEAN_FIELDS):
"""Import the processed mysql dump into the prepared postgres database,
then revert the modifications. """
with cd(env.project_path), \
prefix('export PGPASSWORD=%s' % env.db_pass):
run('psql -U %s %s -c "TRUNCATE django_content_type CASCADE"' % (
env.db_user, env.db_name))
run('psql -U %s %s -c "TRUNCATE django_site CASCADE"' % (
env.db_user, env.db_name))
run('psql -U %s %s < data.psql' % (
env.db_user, env.db_name))
for query in post_queries:
run('psql -U %s %s -c \'%s\'' % (
env.db_user, env.db_name, query))
# revert bools to their correct type
for table, field in boolean_fields:
run('psql -U %s %s -c \'ALTER TABLE %s ALTER "%s" TYPE BOOLEAN '
'USING CASE WHEN "%s"=0 THEN false ELSE true END;\'' % (
env.db_user, env.db_name, table, field, field))
print('>>> Reset sequences now: <<<')
# TODO integrate this - currently needs to be done manually
# see http://stackoverflow.com/questions/244243
RESET_SEQUENCES = """CREATE OR REPLACE FUNCTION "reset_sequence" (tablename text, columnname text)
RETURNS "pg_catalog"."void" AS
$body$
DECLARE
BEGIN
EXECUTE 'SELECT setval( pg_get_serial_sequence(''' || tablename || ''', ''' || columnname || '''),
(SELECT COALESCE(MAX(id)+1,1) FROM ' || tablename || '), false)';
END;
$body$ LANGUAGE 'plpgsql';
select table_name || '_' || column_name || '_seq', reset_sequence(table_name, column_name) from information_schema.columns where column_default like 'nextval%';"""
print (RESET_SEQUENCES)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment