Last active
June 23, 2017 01:50
-
-
Save gregplaysguitar/87464b5d604153eb373675fed34bae79 to your computer and use it in GitHub Desktop.
MySQL to PostgreSQL migrate script for Django. Turns a mysql export into a matching postgres database.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage | |
===== | |
1. Install Fabric - http://www.fabfile.org/ | |
2. Obtain db_converter.py from | |
https://github.com/lanyrd/mysql-postgresql-converter - put it in the same | |
directory as this fabfile. | |
3. Fill out config in this file, and make sure your migrations for each | |
application match the mysql schema. | |
4. To do a dry run on a test server: | |
fab test_server initial_migrate dump_mysql fetch_mysql process_mysql \ | |
import_data | |
for deployment, change test_server to prod_server. | |
5. Make sure you do the manual "reset sequences" step at the end. | |
""" | |
from fabric.api import run, env | |
from fabric.decorators import hosts | |
from fabric.context_managers import cd, prefix | |
# CONFIGURATION | |
MIGRATE_FIRST = ( | |
# list any django apps which need to be migrated first due to dependencies | |
) | |
# credentials for the the old server and mysql database | |
OLD_SERVER = '' | |
OLD_DB_USER = '' | |
OLD_DB_PASS = '' | |
OLD_DB_NAME = '' | |
PRE_QUERIES = [ | |
# run on the new postgres database prior to data insertion | |
'ALTER TABLE django_content_type ADD COLUMN name varchar(100)', | |
] | |
POST_QUERIES = [ | |
# run on the new postgres database after data insertion | |
"ALTER TABLE django_content_type DROP COLUMN name;", | |
] | |
def test_server(): | |
env.hosts = [''] # new host server | |
env.project_path = '' # django project root | |
env.python = '' # python executable to use - i.e. from a virtualenv | |
# new (postgres) db credentials | |
env.db_name = '' | |
env.db_user = '' | |
env.db_pass = '' | |
def prod_server(): | |
# as per test_server | |
env.hosts = [''] | |
env.project_path = '' | |
env.python = '' | |
env.db_name = '' | |
env.db_user = '' | |
env.db_pass = '' | |
# list all tables which need to be exported mysql - i.e. the output of | |
# SHOW TABLES; | |
TABLES = """ | |
auth_group | |
auth_group_permissions | |
auth_permission | |
auth_user | |
auth_user_groups | |
auth_user_user_permissions | |
django_admin_log | |
django_content_type | |
django_session | |
django_site | |
""" | |
# List all boolean fields in the database - these need to be manually converted | |
# To generate in postgres: | |
# SELECT table_name, column_name | |
# FROM information_schema.columns | |
# WHERE table_schema <> 'pg_catalog' AND data_type = 'boolean'; | |
# | |
# then find and replace: ([\w\_]+)\t([\w\_]+) -> ('$1', '$2'), | |
BOOLEAN_FIELDS = ( | |
('auth_user', 'is_superuser'), | |
('auth_user', 'is_staff'), | |
('auth_user', 'is_active'), | |
) | |
# FAB FUNCTIONS | |
def initial_migrate(overwrite=False, boolean_fields=BOOLEAN_FIELDS, | |
migrate_first=MIGRATE_FIRST, pre_queries=PRE_QUERIES): | |
"""Run the initial migration on the new database, and prepare it for data | |
import. Assumes a blank starting point. """ | |
with cd(env.project_path), prefix('export PGPASSWORD=%s' % env.db_pass): | |
for app in migrate_first: | |
run('%s ./manage.py migrate %s 0001' % (env.python, app)) | |
run('%s ./manage.py migrate' % (env.python)) | |
# make all bools integers since that's what mysql gives us | |
for table, field in boolean_fields: | |
run('psql -U %s %s -c \'ALTER TABLE %s ALTER "%s" TYPE ' | |
'INTEGER USING CASE WHEN false THEN 0 ELSE 1 END;\'' % ( | |
env.db_user, env.db_name, table, field)) | |
for query in pre_queries: | |
run('psql -U %s %s -c \'%s\'' % ( | |
env.db_user, env.db_name, query.replace("\n", ' '))) | |
@hosts(OLD_SERVER) | |
def dump_mysql(tables=TABLES): | |
"""Export the old database as an .sql file """ | |
run(('mysqldump --compatible=postgresql --no-create-info ' | |
'--default-character-set=utf8 -c -u %s -p%s %s %s > data.sql') % ( | |
OLD_DB_USER, OLD_DB_PASS, OLD_DB_NAME, tables.replace("\n", ' '))) | |
def fetch_mysql(): | |
"""Copy the old database dump to the new server. """ | |
with cd(env.project_path): | |
if env.host_string == OLD_SERVER: | |
# same server, so straight copy | |
run('cp ~/data.sql ./') | |
else: | |
# copy from remote server | |
run('rsync -z %s:~/data.sql ./' % (OLD_SERVER)) | |
def process_mysql(): | |
"""Process the database export ready for import into postgres. """ | |
with cd(env.project_path): | |
run('python ./db_converter.py data.sql data.psql') | |
def import_data(post_queries=POST_QUERIES, boolean_fields=BOOLEAN_FIELDS): | |
"""Import the processed mysql dump into the prepared postgres database, | |
then revert the modifications. """ | |
with cd(env.project_path), \ | |
prefix('export PGPASSWORD=%s' % env.db_pass): | |
run('psql -U %s %s -c "TRUNCATE django_content_type CASCADE"' % ( | |
env.db_user, env.db_name)) | |
run('psql -U %s %s -c "TRUNCATE django_site CASCADE"' % ( | |
env.db_user, env.db_name)) | |
run('psql -U %s %s < data.psql' % ( | |
env.db_user, env.db_name)) | |
for query in post_queries: | |
run('psql -U %s %s -c \'%s\'' % ( | |
env.db_user, env.db_name, query)) | |
# revert bools to their correct type | |
for table, field in boolean_fields: | |
run('psql -U %s %s -c \'ALTER TABLE %s ALTER "%s" TYPE BOOLEAN ' | |
'USING CASE WHEN "%s"=0 THEN false ELSE true END;\'' % ( | |
env.db_user, env.db_name, table, field, field)) | |
print('>>> Reset sequences now: <<<') | |
# TODO integrate this - currently needs to be done manually | |
# see http://stackoverflow.com/questions/244243 | |
RESET_SEQUENCES = """CREATE OR REPLACE FUNCTION "reset_sequence" (tablename text, columnname text) | |
RETURNS "pg_catalog"."void" AS | |
$body$ | |
DECLARE | |
BEGIN | |
EXECUTE 'SELECT setval( pg_get_serial_sequence(''' || tablename || ''', ''' || columnname || '''), | |
(SELECT COALESCE(MAX(id)+1,1) FROM ' || tablename || '), false)'; | |
END; | |
$body$ LANGUAGE 'plpgsql'; | |
select table_name || '_' || column_name || '_seq', reset_sequence(table_name, column_name) from information_schema.columns where column_default like 'nextval%';""" | |
print (RESET_SEQUENCES) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment