Skip to content

Instantly share code, notes, and snippets.

@duyet
Forked from vigneshwaranr/migrator.sh
Last active August 29, 2015 14:19
Show Gist options
  • Save duyet/d0338a91ff3163d07886 to your computer and use it in GitHub Desktop.
Save duyet/d0338a91ff3163d07886 to your computer and use it in GitHub Desktop.
#! /bin/sh
usage_error () {
echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>'
echo
echo 'Example:'
echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps'
echo
echo 'Tested on:'
echo 'Python 2.7.3'
echo 'SQLite 3.7.9'
}
if [ ! $# -eq 3 ]
then
usage_error
exit 1
fi
if [ ! -r $1 ]
then
echo $1' is not readable.'
echo 'Please give the correct path to sqlite_to_postgres.py'
exit 1
fi
if [ ! -r $2 ]
then
echo $2' is not readable'
exit 1
fi
if [ ! -d $3 ]
then
echo $3' is not a valid directory'
exit 1
fi
#Get the list of tables
echo .tables | sqlite3 $2 > $3/lsoftbls
#Get dumps from sqlite
for i in `cat $3/lsoftbls`
do
echo 'Generating sqlite dumps for '$i
echo '.output '$3'/'$i'.dump' > $3/dumper
echo 'pragma table_info('$i');' >> $3/dumper
echo '.dump '$i >> $3/dumper
echo '.quit' >> $3/dumper
cat $3/dumper | sqlite3 $2
done
#Use the python script to convert the sqlite dumps to psql dumps
echo
echo 'Now converting the sqlite dumps into psql format...'
echo
for i in `ls -1 $3/*.dump`
do
python $1 $i
done
#Remove the sqlite3 dumps and the file 'lsoftbls'
echo
echo 'Removing temporary files..'
rm $3/*.dump
rm $3/lsoftbls
rm $3/dumper
echo 'Removing empty dump files..'
wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm
echo ; echo 'Done.'; echo
echo 'Please find the psql dumps at '$3
0|display_name|varchar(64)|1||0
1|name|varchar(64)|1||0
2|local_site_id|integer|0||0
3|incoming_request_count|integer|0||0
4|invite_only|bool|1||0
5|id|integer|1||1
6|mailing_list|varchar(75)|1||0
7|visible|bool|1||0
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL);
INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1);
INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1);
INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1);
INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1);
INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1);
COMMIT;
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE);
#! /usr/bin/python
# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans
# This python script serves a single purpose of converting the sqlite dumps
# into postres-compatible dumps by converting the boolean values.
import random
import sys
import os.path
BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10)))
COLUMNS = []
COLUMN_NAMES = ""
COLUMN_TYPES = ()
def usage():
'''
Print usage and exit
'''
print "Usage: ./bool_changer.py <filename.dump>"
sys.exit()
def fix_column_names(first_line):
'''
The insert statement from sqlite3 dump is as follows:
INSERT INTO "test" VALUES(1,'Hello');
We need to add the column information to the statements like this:
INSERT INTO "test" (id,name) VALUES(1,'Wibble');
This is necessary because the column orders may be different in psql db.
'''
global COLUMN_NAMES
index = first_line.index(' VALUES')
return first_line[:index] + COLUMN_NAMES + first_line[index:]
def fix_bool(stmt):
from_here = 'VALUES('
start_pos = stmt.index(from_here) + len(from_here)
cur_pos = start_pos
newstmt = stmt[:start_pos] # [INSERT ... VALUES(]
stmtlen = len(stmt)
no_of_cols = len(COLUMN_TYPES)
for i in range(0,no_of_cols):
if COLUMN_TYPES[i] == 'bool':
newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same
if stmt[cur_pos] == '1': newstmt += 'TRUE'
elif stmt[cur_pos] == '0': newstmt += 'FALSE'
if i == no_of_cols-1: #i.e. last column
newstmt += ');\n'
break
newstmt += ',' #not last column
cur_pos += 2
start_pos = cur_pos
else:
if i == no_of_cols-1: #if it's the last non-bool column, then
newstmt += stmt[start_pos:] #simply insert everything that's left
break #and leave
if stmt[cur_pos] != "'":
for cur_pos in range(cur_pos+1,stmtlen):
if stmt[cur_pos] == ',':
cur_pos += 1
break #the inner loop and go to next column
else: # the 'problematic' place. cur_pos in "'"
cur_pos += 1 #what's next after "'"?
while cur_pos < stmtlen:
if stmt[cur_pos] == "'":
if stmt[cur_pos+1] == "'": #ignore escaped quote ('')
cur_pos += 2
continue #searching
elif stmt[cur_pos+1] == ",": #end of string
cur_pos += 2
break #to next column
cur_pos += 1
return newstmt
def get_psql_inserts(insert_lines):
'''
This method will get a list of one or more lines that together constitute
a single insert statement from the sqlite dump, manipulates it and
returns the list containing the psql compatible insert statement.
'''
global BOUNDARY
#First fix the column name issue.
insert_lines[0] = fix_column_names(insert_lines[0])
if 'bool' in COLUMN_TYPES:
insert_stmt = BOUNDARY.join(insert_lines)
insert_stmt = fix_bool(insert_stmt)
insert_lines = insert_stmt.split(BOUNDARY)
return insert_lines
def process_dump(input_file,output_file):
'''
Process the file lazily line by line
'''
def process_insert(insert_lines):
'''
Helper method to write psql commands into output_file
'''
psql_inserts = get_psql_inserts(insert_lines)
output_file.writelines(psql_inserts)
global COLUMNS
global COLUMN_NAMES
global COLUMN_TYPES
after_pragma = False #The first few lines will be schema info upto the
#line that starts with "PRAGMA"
insert_started = False
insert_lines = []
insert_stmt_start = 'INSERT'
for line in input_file:
#Get the schema info from the head of the dump file
if not after_pragma:
if line[0].isdigit():
COLUMNS.append(tuple(line.split('|')[1:3]))
elif line.startswith('PRAGMA'):
after_pragma = True
COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS]))
COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS])
#Python uses single quotes for enclosing a string.
#But psql uses double quotes on "column names" and
#single quotes on strings inside VALUES(..)
COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"')
continue
#Ignore the lines from PRAGMA and before INSERT.
if not insert_started:
if line.startswith('CREATE TABLE'):
table_name = line[line.index('"'):]
table_name = table_name[:table_name.index('"',1)+1] # '"table_name"'
insert_stmt_start = 'INSERT INTO ' + table_name
elif line.startswith('INSERT'):
insert_started = True
else: continue
#If the control reaches here, it must mean that the first insert statement
#has appeared. But the insert statements may span multiple lines. So, we
#collect those lines and process them.
if line.startswith(insert_stmt_start):
if insert_lines: #True from 2nd insert statement
process_insert(insert_lines) #Insert the previous insert statement
insert_lines = [line] #and append the current one
elif insert_lines:
insert_lines.append(line)
if not insert_lines: return
while insert_lines[-1].endswith(';\n') and \
(insert_lines[-1].startswith('CREATE INDEX') or \
insert_lines[-1].startswith('COMMIT')):
insert_lines.pop() #remove the create index and commit lines at the end
process_insert(insert_lines) #fix the last insert statement
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
filename = sys.argv[1]
output_filename = filename + '.psql'
if not os.path.isfile(filename):
print "FATAL: Not a valid filename"
usage()
print sys.argv[0], ': Trying to convert', sys.argv[1]
try:
input_file = open(filename,'r')
output_file = open(output_filename,'w')
process_dump(input_file,output_file)
finally:
input_file.close()
output_file.close()
print sys.argv[0], ': Converted to', output_filename
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment