Last active
February 10, 2021 09:21
-
-
Save shantanuo/5115366 to your computer and use it in GitHub Desktop.
script to convert mysql schema to be compatible with data warehouse software
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# script to convert mysql schema to be compatible with data warehouse software | |
# make sure that s3cmd and maatkit utility is installed | |
db_name=${1:-'test'} | |
> /root/$db_name.txt | |
temppath='/mnt/data/pdump1' | |
host='localhost' | |
user='maatkit' | |
password='maatkit123' | |
bucket=`date '+%b%d'` | |
s3path="s3://$bucket$db_name" | |
access='ABC' | |
secret='PQR+XYZ' | |
options="emptyasnull blanksasnull maxerror 5000 ignoreblanklines escape delimiter '\t' gzip " | |
adminmail='[email protected]' | |
# reset dump path | |
rm -rf $temppath | |
mkdir $temppath | |
#mkdir $temppath/$db_name | |
chmod 777 $temppath | |
# use maatkit to dump tab separated data | |
time mk-parallel-dump -h"$host" -u"$user" -p"$password" --base-dir "$temppath" --databases $db_name --tab | |
# if maatkit is not installed, use built-in tab parameter | |
#extra="--tab=$temppath/$db_name -f --no-create-info" | |
#time mysqldump $db_name $extra | |
# generate drop table statements | |
for tbl_name in ` mysql $db_name -Bse"show tables" ` | |
do | |
echo "drop table $tbl_name ;" >> /root/$db_name.txt | |
done | |
# create table statements modified using sed | |
mysqldump $db_name --skip-triggers --no-data --compact --compatible=ansi,no_table_options,no_key_options,no_field_options --force | | |
grep -v ' KEY "' | | |
grep -v ' UNIQUE KEY "' | | |
grep -v ' PRIMARY KEY ' | | |
grep -v ' CONSTRAINT "' | | |
sed '/^SET/d' | | |
sed 's/ unsigned / /g' | | |
sed 's/ zerofill / /g' | | |
sed 's/ auto_increment/ primary key autoincrement/g' | | |
sed 's/ tinyint([0-9]*) / smallint /g' | | |
sed 's/ smallint([0-9]*) / smallint /g' | | |
sed 's/ mediumint([0-9]*) / integer /g' | | |
sed 's/ int([0-9]*) / integer /g' | | |
sed 's/ bigint([0-9]*) / bigint /g' | | |
sed 's/ double / float /g' | | |
sed 's/ double(.*) / float /g' | | |
sed 's/ double, / float, /g' | | |
sed 's/ double(.*), / float, /g' | | |
sed 's/ float(.*)/ float /g' | | |
sed 's/ decimal(.*)/ decimal(29,2) /g' | | |
sed 's/ time / varchar(255) /g' | | |
sed 's/ time,/ varchar(255), /g' | | |
sed 's/ tinytext/ varchar(255) /g' | | |
sed 's/ text / varchar(max) /g' | | |
sed 's/ text,/ varchar(max), /g' | | |
sed 's/ mediumtext/ varchar(max) /g' | | |
sed 's/ longtext/ varchar(max) /g' | | |
sed 's/ tinyblob/ varchar(max) /g' | | |
sed 's/ blob/ varchar(max) /g' | | |
sed 's/ mediumblob/ varchar(max) /g' | | |
sed 's/ longblob/ varchar(max) /g' | | |
sed 's/ set(.*)/ varchar(max) /g' | | |
sed 's/ CHARACTER SET \w*/ /g' | | |
sed 's/ enum([^)]*) / varchar(255) /g' | | |
sed "s/ COMMENT.*'[^']*'/ /" | | |
sed "s/date DEFAULT '0000-00-00'/ date /g" | | |
sed "s/date NOT NULL DEFAULT '0000-00-00'/ date /g" | | |
sed "s/timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'/ timestamp /g" | | |
sed "s/timestamp DEFAULT '0000-00-00 00:00:00'/ timestamp /g" | | |
sed "s/datetime NOT NULL DEFAULT '0000-00-00 00:00:00'/ datetime /g" | | |
sed "s/datetime DEFAULT '0000-00-00 00:00:00'/ datetime /g" | | |
sed 's/ on update [^,]*//g' | | |
sed 's/a/aA/g;s/__/aB/g;s/#/aC/g' | sed 's/aC/#/g;s/aB/__/g;s/aA/a/g' | | |
sed 's/ DEFAULT .*,/,/' | | |
sed 's/ DEFAULT .*//' | | |
sed 's/NOT NULL AUTO_INCREMENT,/,/' | | |
sed 's/NOT NULL//' | | |
sed 's/ char(.*)/ varchar(max)/' | | |
sed '/\/\*/d' | | |
sed -r ':a; s%(.*)/\*.*\*/%\1%; ta; /\/\*/ !b; N; ba' | | |
perl -e 'local $/;$_=<>;s/,\n\)/\n\)/gs;print "\n";print;print "\n"' | | |
perl -pe ' | |
if (/^(INSERT.+?)\(/) { | |
$a=$1; | |
s/\\'\''/'\'\''/g; | |
s/\\n/\n/g; | |
s/\),\(/\);\n$a\(/g; | |
} | |
' >> /root/$db_name.txt 2> /root/$db_name.err | |
# generate copy statements | |
for tbl_name in ` mysql $db_name -Bse"show tables" ` | |
do | |
echo "copy $tbl_name from '$s3path/$tbl_name.' credentials 'aws_access_key_id=$access;aws_secret_access_key=$secret' $options;" >> /root/$db_name.txt | |
done | |
# compress files | |
time gzip $temppath/$db_name/*.txt | |
# copy to amazon s3 | |
s3cmd mb $s3path | |
s3cmd sync /$temppath/$db_name/ $s3path | |
# send the create table statements file by email | |
echo "create table statements compatible with redshift for db $db_name attached. " | mutt -s "redshift commands for $db_name" -a /root/$db_name.txt -- $adminmail | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey!
Thanks for this script, it helped me a lot to get my redshift import working. Just curious, why you added line 93 here, since this change broke my import and am interested what your intent was to add it and what problem it should solve.
Thanks again!