-
-
Save duyet/d0338a91ff3163d07886 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
usage_error () { | |
echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>' | |
echo | |
echo 'Example:' | |
echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps' | |
echo | |
echo 'Tested on:' | |
echo 'Python 2.7.3' | |
echo 'SQLite 3.7.9' | |
} | |
if [ ! $# -eq 3 ] | |
then | |
usage_error | |
exit 1 | |
fi | |
if [ ! -r $1 ] | |
then | |
echo $1' is not readable.' | |
echo 'Please give the correct path to sqlite_to_postgres.py' | |
exit 1 | |
fi | |
if [ ! -r $2 ] | |
then | |
echo $2' is not readable' | |
exit 1 | |
fi | |
if [ ! -d $3 ] | |
then | |
echo $3' is not a valid directory' | |
exit 1 | |
fi | |
#Get the list of tables | |
echo .tables | sqlite3 $2 > $3/lsoftbls | |
#Get dumps from sqlite | |
for i in `cat $3/lsoftbls` | |
do | |
echo 'Generating sqlite dumps for '$i | |
echo '.output '$3'/'$i'.dump' > $3/dumper | |
echo 'pragma table_info('$i');' >> $3/dumper | |
echo '.dump '$i >> $3/dumper | |
echo '.quit' >> $3/dumper | |
cat $3/dumper | sqlite3 $2 | |
done | |
#Use the python script to convert the sqlite dumps to psql dumps | |
echo | |
echo 'Now converting the sqlite dumps into psql format...' | |
echo | |
for i in `ls -1 $3/*.dump` | |
do | |
python $1 $i | |
done | |
#Remove the sqlite3 dumps and the file 'lsoftbls' | |
echo | |
echo 'Removing temporary files..' | |
rm $3/*.dump | |
rm $3/lsoftbls | |
rm $3/dumper | |
echo 'Removing empty dump files..' | |
wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm | |
echo ; echo 'Done.'; echo | |
echo 'Please find the psql dumps at '$3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0|display_name|varchar(64)|1||0 | |
1|name|varchar(64)|1||0 | |
2|local_site_id|integer|0||0 | |
3|incoming_request_count|integer|0||0 | |
4|invite_only|bool|1||0 | |
5|id|integer|1||1 | |
6|mailing_list|varchar(75)|1||0 | |
7|visible|bool|1||0 | |
PRAGMA foreign_keys=OFF; | |
BEGIN TRANSACTION; | |
CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL); | |
INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1); | |
INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1); | |
INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1); | |
INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1); | |
INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1); | |
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans | |
# This python script serves a single purpose of converting the sqlite dumps | |
# into postres-compatible dumps by converting the boolean values. | |
import random | |
import sys | |
import os.path | |
BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10))) | |
COLUMNS = [] | |
COLUMN_NAMES = "" | |
COLUMN_TYPES = () | |
def usage(): | |
''' | |
Print usage and exit | |
''' | |
print "Usage: ./bool_changer.py <filename.dump>" | |
sys.exit() | |
def fix_column_names(first_line): | |
''' | |
The insert statement from sqlite3 dump is as follows: | |
INSERT INTO "test" VALUES(1,'Hello'); | |
We need to add the column information to the statements like this: | |
INSERT INTO "test" (id,name) VALUES(1,'Wibble'); | |
This is necessary because the column orders may be different in psql db. | |
''' | |
global COLUMN_NAMES | |
index = first_line.index(' VALUES') | |
return first_line[:index] + COLUMN_NAMES + first_line[index:] | |
def fix_bool(stmt): | |
from_here = 'VALUES(' | |
start_pos = stmt.index(from_here) + len(from_here) | |
cur_pos = start_pos | |
newstmt = stmt[:start_pos] # [INSERT ... VALUES(] | |
stmtlen = len(stmt) | |
no_of_cols = len(COLUMN_TYPES) | |
for i in range(0,no_of_cols): | |
if COLUMN_TYPES[i] == 'bool': | |
newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same | |
if stmt[cur_pos] == '1': newstmt += 'TRUE' | |
elif stmt[cur_pos] == '0': newstmt += 'FALSE' | |
if i == no_of_cols-1: #i.e. last column | |
newstmt += ');\n' | |
break | |
newstmt += ',' #not last column | |
cur_pos += 2 | |
start_pos = cur_pos | |
else: | |
if i == no_of_cols-1: #if it's the last non-bool column, then | |
newstmt += stmt[start_pos:] #simply insert everything that's left | |
break #and leave | |
if stmt[cur_pos] != "'": | |
for cur_pos in range(cur_pos+1,stmtlen): | |
if stmt[cur_pos] == ',': | |
cur_pos += 1 | |
break #the inner loop and go to next column | |
else: # the 'problematic' place. cur_pos in "'" | |
cur_pos += 1 #what's next after "'"? | |
while cur_pos < stmtlen: | |
if stmt[cur_pos] == "'": | |
if stmt[cur_pos+1] == "'": #ignore escaped quote ('') | |
cur_pos += 2 | |
continue #searching | |
elif stmt[cur_pos+1] == ",": #end of string | |
cur_pos += 2 | |
break #to next column | |
cur_pos += 1 | |
return newstmt | |
def get_psql_inserts(insert_lines): | |
''' | |
This method will get a list of one or more lines that together constitute | |
a single insert statement from the sqlite dump, manipulates it and | |
returns the list containing the psql compatible insert statement. | |
''' | |
global BOUNDARY | |
#First fix the column name issue. | |
insert_lines[0] = fix_column_names(insert_lines[0]) | |
if 'bool' in COLUMN_TYPES: | |
insert_stmt = BOUNDARY.join(insert_lines) | |
insert_stmt = fix_bool(insert_stmt) | |
insert_lines = insert_stmt.split(BOUNDARY) | |
return insert_lines | |
def process_dump(input_file,output_file): | |
''' | |
Process the file lazily line by line | |
''' | |
def process_insert(insert_lines): | |
''' | |
Helper method to write psql commands into output_file | |
''' | |
psql_inserts = get_psql_inserts(insert_lines) | |
output_file.writelines(psql_inserts) | |
global COLUMNS | |
global COLUMN_NAMES | |
global COLUMN_TYPES | |
after_pragma = False #The first few lines will be schema info upto the | |
#line that starts with "PRAGMA" | |
insert_started = False | |
insert_lines = [] | |
insert_stmt_start = 'INSERT' | |
for line in input_file: | |
#Get the schema info from the head of the dump file | |
if not after_pragma: | |
if line[0].isdigit(): | |
COLUMNS.append(tuple(line.split('|')[1:3])) | |
elif line.startswith('PRAGMA'): | |
after_pragma = True | |
COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS])) | |
COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS]) | |
#Python uses single quotes for enclosing a string. | |
#But psql uses double quotes on "column names" and | |
#single quotes on strings inside VALUES(..) | |
COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"') | |
continue | |
#Ignore the lines from PRAGMA and before INSERT. | |
if not insert_started: | |
if line.startswith('CREATE TABLE'): | |
table_name = line[line.index('"'):] | |
table_name = table_name[:table_name.index('"',1)+1] # '"table_name"' | |
insert_stmt_start = 'INSERT INTO ' + table_name | |
elif line.startswith('INSERT'): | |
insert_started = True | |
else: continue | |
#If the control reaches here, it must mean that the first insert statement | |
#has appeared. But the insert statements may span multiple lines. So, we | |
#collect those lines and process them. | |
if line.startswith(insert_stmt_start): | |
if insert_lines: #True from 2nd insert statement | |
process_insert(insert_lines) #Insert the previous insert statement | |
insert_lines = [line] #and append the current one | |
elif insert_lines: | |
insert_lines.append(line) | |
if not insert_lines: return | |
while insert_lines[-1].endswith(';\n') and \ | |
(insert_lines[-1].startswith('CREATE INDEX') or \ | |
insert_lines[-1].startswith('COMMIT')): | |
insert_lines.pop() #remove the create index and commit lines at the end | |
process_insert(insert_lines) #fix the last insert statement | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
usage() | |
filename = sys.argv[1] | |
output_filename = filename + '.psql' | |
if not os.path.isfile(filename): | |
print "FATAL: Not a valid filename" | |
usage() | |
print sys.argv[0], ': Trying to convert', sys.argv[1] | |
try: | |
input_file = open(filename,'r') | |
output_file = open(output_filename,'w') | |
process_dump(input_file,output_file) | |
finally: | |
input_file.close() | |
output_file.close() | |
print sys.argv[0], ': Converted to', output_filename | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment