Created
March 4, 2019 21:20
-
-
Save JayBizzle/7a51c13bb5432eabc92d70bfc1de5581 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
# Authors: @esperlu, @artemyk, @gkuenning, @dumblob | |
# FIXME detect empty input file and issue a warning | |
function printerr( s ){ print s | "cat >&2" } | |
BEGIN { | |
if( ARGC != 2 ){ | |
printerr( \ | |
"USAGE: mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \ | |
" file name - (dash) is not supported, because - means stdin") | |
no_END = 1 | |
exit 1 | |
} | |
# Find INT_MAX supported by both this AWK (usually an ISO C signed int) | |
# and SQlite. | |
# On non-8bit-based architectures, the additional bits are safely ignored. | |
# 8bit (lower precision should not exist) | |
s="127" | |
# "63" + 0 avoids potential parser misbehavior | |
if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 } | |
# 16bit | |
s="32767" | |
if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 } | |
# 32bit | |
s="2147483647" | |
if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 } | |
# 64bit (as INTEGER in SQlite3) | |
s="9223372036854775807" | |
if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 } | |
# # 128bit | |
# s="170141183460469231731687303715884105728" | |
# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 } | |
# # 256bit | |
# s="57896044618658097711785492504343953926634992332820282019728792003956564819968" | |
# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 } | |
# # 512bit | |
# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048" | |
# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 } | |
# # 1024bit | |
# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608" | |
# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 } | |
# # higher precision probably not needed | |
FS=",$" | |
print "PRAGMA synchronous = OFF;" | |
print "PRAGMA journal_mode = MEMORY;" | |
print "BEGIN TRANSACTION;" | |
} | |
# historically 3 spaces separate non-argument local variables | |
function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){ | |
powtwo = 1 | |
overflow = 0 | |
# 011101 = 1*2^0 + 0*2^1 + 1*2^2 ... | |
for( i = length( str_bit ); i > 0; --i ){ | |
bit = substr( str_bit, i, 1 ) | |
if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){ | |
printerr( \ | |
NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." ) | |
break | |
} | |
res = res + bit * powtwo | |
# no warning here as it might be the last iteration | |
if( powtwo > INT_MAX_HALF ){ overflow = 1; continue } | |
powtwo = powtwo * 2 | |
} | |
return res | |
} | |
# CREATE TRIGGER statements have funny commenting. Remember we are in trigger. | |
/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ { | |
gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" ) | |
inTrigger = 1 | |
next | |
} | |
# The end of CREATE TRIGGER has a stray comment terminator | |
/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next } | |
# The rest of triggers just get passed through | |
inTrigger != 0 { print; next } | |
# CREATE VIEW looks like a TABLE in comments | |
/^\/\*.*(CREATE.*TABLE|create.*table)/ { | |
inView = 1 | |
next | |
} | |
# end of CREATE VIEW | |
/^(\).*(ENGINE|engine).*\*\/;)/ { | |
inView = 0 | |
next | |
} | |
# content of CREATE VIEW | |
inView != 0 { next } | |
# skip comments | |
/^\/\*/ { next } | |
# skip PARTITION statements | |
/^ *[(]?(PARTITION|partition) +[^ ]+/ { next } | |
# print all INSERT lines | |
( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ { | |
prev = "" | |
# first replace \\ by \_ that mysqldump never generates to deal with | |
# sequnces like \\n that should be translated into \n, not \<LF>. | |
# After we convert all escapes we replace \_ by backslashes. | |
gsub( /\\\\/, "\\_" ) | |
# single quotes are escaped by another single quote | |
gsub( /\\'/, "''" ) | |
gsub( /\\n/, "\n" ) | |
gsub( /\\r/, "\r" ) | |
gsub( /\\"/, "\"" ) | |
gsub( /\\\032/, "\032" ) # substitute char | |
gsub( /\\_/, "\\" ) | |
# sqlite3 is limited to 16 significant digits of precision | |
while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){ | |
hexIssue = 1 | |
sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 ) | |
} | |
if( hexIssue ){ | |
printerr( \ | |
NR ": WARN Hex number trimmed (length longer than 16 chars)." ) | |
hexIssue = 0 | |
} | |
next | |
} | |
# CREATE DATABASE is not supported | |
/^(CREATE.*DATABASE|create.*database)/ { next } | |
# print the CREATE line as is and capture the table name | |
/^(CREATE|create)/ { | |
if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){ | |
caseIssue = 1 | |
printerr( \ | |
NR ": WARN Potential case sensitivity issues with table/column naming\n" \ | |
" (see INFO at the end)." ) | |
} | |
if( match( $0, /`[^`]+/ ) ){ | |
tableName = substr( $0, RSTART+1, RLENGTH-1 ) | |
} | |
aInc = 0 | |
prev = "" | |
firstInTable = 1 | |
next | |
} | |
# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`) | |
/^ (FULLTEXT KEY|fulltext key)/ { gsub( /.+(KEY|key)/, " KEY" ) } | |
# Get rid of field lengths in KEY lines | |
/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) } | |
aInc == 1 && /PRIMARY KEY|primary key/ { next } | |
# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY | |
/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) } | |
# Print all fields definition lines except the `KEY` lines. | |
/^ / && !/^( (KEY|key)|\);)/ { | |
if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){ | |
aInc = 1 | |
gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" ) | |
} | |
gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " ) | |
gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" ) | |
# FIXME | |
# CREATE TRIGGER [UpdateLastTime] | |
# AFTER UPDATE | |
# ON Package | |
# FOR EACH ROW | |
# BEGIN | |
# UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId; | |
# END | |
gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" ) | |
gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp") | |
gsub( /(COLLATE|collate) [^ ]+ /, "" ) | |
gsub( /(ENUM|enum)[^)]+\)/, "text " ) | |
gsub( /(SET|set)\([^)]+\)/, "text " ) | |
gsub( /UNSIGNED|unsigned/, "" ) | |
gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" ) | |
gsub( /ZEROFILL|zerofill/, "" )x | |
gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" ) | |
ere_bit_field = "[bB]'[10]+'" | |
if( match($0, ere_bit_field) ){ | |
sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) ) | |
} | |
# field comments are not supported | |
gsub( / (COMMENT|comment).+$/, "" ) | |
# Get commas off end of line | |
gsub( /,.?$/, "" ) | |
if( prev ){ | |
if( firstInTable ){ | |
print prev | |
firstInTable = 0 | |
} | |
else { | |
print "," prev | |
} | |
} | |
else { | |
# FIXME check if this is correct in all cases | |
if( match( $1, | |
/(CONSTRAINT|constraint) \".*\" (FOREIGN KEY|foreign key)/ ) ){ | |
print "," | |
} | |
} | |
prev = $1 | |
} | |
/ ENGINE| engine/ { | |
if( prev ){ | |
if( firstInTable ){ | |
print prev | |
firstInTable = 0 | |
} | |
else { | |
print "," prev | |
} | |
} | |
prev="" | |
print ");" | |
next | |
} | |
# `KEY` lines are extracted from the `CREATE` block and stored in array for later print | |
# in a separate `CREATE KEY` command. The index name is prefixed by the table name to | |
# avoid a sqlite error for duplicate index name. | |
/^( (KEY|key)|\);)/ { | |
if( prev ){ | |
if( firstInTable ){ | |
print prev | |
firstInTable = 0 | |
} | |
else { | |
print "," prev | |
} | |
} | |
prev = "" | |
if( $0 == ");" ){ | |
} | |
else { | |
if( match( $0, /`[^`]+/ ) ){ | |
indexName = substr( $0, RSTART+1, RLENGTH-1 ) | |
} | |
if( match( $0, /\([^()]+/ ) ){ | |
indexKey = substr( $0, RSTART+1, RLENGTH-1 ) | |
} | |
# idx_ prefix to avoid name clashes (they really happen!) | |
key[tableName] = key[tableName] "CREATE INDEX \"idx_" \ | |
tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n" | |
} | |
} | |
END { | |
if( no_END ){ exit 1} | |
# print all KEY creation lines. | |
for( table in key ){ printf key[table] } | |
print "END TRANSACTION;" | |
if( caseIssue ){ | |
printerr( \ | |
"INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \ | |
" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \ | |
" identifiers. Thus expect errors like \"table T has no column named F\".") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment