Last active
January 27, 2020 15:38
-
-
Save shantanuo/5011482 to your computer and use it in GitHub Desktop.
Make sure that the package s3cmd is installed and PHP supports pgsql driver. time sh -xv mysql_to_redshift.sh test email_lists This will create a text file createme.sql.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# download this script | |
# make sure s3cmd is installed | |
# yum install s3cmd | |
# if yum mentioned above does not work then | |
# wget http://downloads.sourceforge.net/project/s3tools/s3cmd/1.5.0-alpha1/s3cmd-1.5.0-alpha1.tar.gz | |
# tar xvf s3cmd-1.5.0-alpha1.tar.gz | |
# cd s3cmd-1.5.0-alpha1 | |
# python setup.py install | |
# python 2.6+ is required | |
# make sure postgresql is supported by PHP | |
# php -r"phpinfo();" | grep pgsql | |
mydb=${1:-'test'} | |
mytable=${2:-'email_credit_summary'} | |
s3folder=`date '+%b%d'` | |
mydatadir=`mysqladmin variables | grep datadir | awk '{print $4}'` | |
access_key='XYZ' | |
secret_key='ABC' | |
redshift_host='viva-mar5-deliveryreport.ccf0ocqhkalc.us-east-1.redshift.amazonaws.com' | |
redshift_port='5439' | |
redshift_dbname='mydb' | |
redshift_user='root' | |
redshift_password='PASSWD' | |
redshift_param="delimiter '\t' emptyasnull blanksasnull maxerror 5000 ignoreblanklines gzip" | |
whereclause="1=1 limit 1000" | |
myoptions="--compact --compatible=mssql,no_key_options,no_table_options,no_field_options --where=\"$whereclause\"" | |
rm -f $mydatadir/$mytable.txt | |
rm -f $mydatadir/$mytable.txt.gz | |
echo "time mysqldump $myoptions $mydb $mytable --tab=$mydatadir" > mydump.txt | |
cat mydump.txt | sh | |
gzip $mydatadir/$mytable.txt | |
s3cmd mb s3://$s3folder | |
s3cmd put $mydatadir/$mytable.txt.gz s3://$s3folder | |
s3cmd put $mydatadir/$mytable.sql s3://$s3folder | |
cat /var/lib/mysql/$mytable.sql | sed 's/(11)//' | sed 's/ int/ integer/' > $mytable.redshift.sql | |
cat $mytable.redshift.sql | | |
grep -v ' KEY "' | | |
grep -v ' UNIQUE KEY "' | | |
grep -v ' PRIMARY KEY ' | | |
sed '/^SET/d' | | |
sed 's/ unsigned / /g' | | |
sed 's/ auto_increment/ primary key autoincrement/g' | | |
sed 's/ tinyint([0-9]*) / smallint /g' | | |
sed 's/ smallint([0-9]*) / smallint /g' | | |
sed 's/ mediumint([0-9]*) / integer /g' | | |
sed 's/ int([0-9]*) / integer /g' | | |
sed 's/ integer([0-9]*) / integer /g' | | |
sed 's/ bigint([0-9]*) / bigint /g' | | |
sed 's/ double / float /g' | | |
sed 's/ tinyblob / text /g' | | |
sed 's/ blob / text /g' | | |
sed 's/ mediumblob / text /g' | | |
sed 's/ longblob / text /g' | | |
sed 's/ tinytext / text /g' | | |
sed 's/ mediumtext / text /g' | | |
sed 's/ longtext / text /g' | | |
sed 's/ tinyblob/ text /g' | | |
sed 's/ blob/ text /g' | | |
sed 's/ mediumblob/ text /g' | | |
sed 's/ longblob/ text /g' | | |
sed 's/ tinytext/ text /g' | | |
sed 's/ mediumtext/ text /g' | | |
sed 's/ longtext/ text /g' | | |
sed 's/ CHARACTER SET \w*/ /g' | | |
sed 's/ enum([^)]*) / varchar(255) /g' | | |
sed "s/ COMMENT.*'[^']*'/ /" | | |
sed "s/date DEFAULT '0000-00-00'/ date /g" | | |
sed "s/date NOT NULL DEFAULT '0000-00-00'/ date /g" | | |
sed "s/timestamp NOT NULL DEFAULT '0000-00-00 00:00:00'/ timestamp /g" | | |
sed "s/timestamp DEFAULT '0000-00-00 00:00:00'/ timestamp /g" | | |
sed "s/datetime NOT NULL DEFAULT '0000-00-00 00:00:00'/ datetime /g" | | |
sed "s/datetime DEFAULT '0000-00-00 00:00:00'/ datetime /g" | | |
sed 's/ on update [^,]*//g' | | |
sed 's/a/aA/g;s/__/aB/g;s/#/aC/g' | gcc -P -E $arg - | sed 's/aC/#/g;s/aB/__/g;s/aA/a/g' | | |
perl -e 'local $/;$_=<>;s/,\n\)/\n\)/gs;print "\n";print;print "\n"' | | |
perl -pe ' | |
if (/^(INSERT.+?)\(/) { | |
$a=$1; | |
s/\\'\''/'\'\''/g; | |
s/\\n/\n/g; | |
s/\),\(/\);\n$a\(/g; | |
} | |
' > $mytable.redshift.me.sql.txt 2> $mytable.redshift.me.err | |
echo "copy $mytable from 's3://$s3folder/$mytable.txt' CREDENTIALS 'aws_access_key_id=$access_key;aws_secret_access_key=$secret_key' $redshift_param " >> $mytable.redshift.me.sql.txt | |
cat $mytable.redshift.me.sql.txt | tr '\n' ' ' > $mytable.redshift.me1.sql.txt | |
cat > myredshift.php << heredoc | |
<?php | |
\$conn=pg_connect("host=$redshift_host port=$redshift_port dbname=$redshift_dbname user=$redshift_user password=$redshift_password"); | |
\$str=file_get_contents("$mytable.redshift.me1.sql.txt"); | |
pg_query(\$conn, \$str); | |
?> | |
heredoc | |
php myredshift.php | |
exit | |
1) Download the script | |
wget --no-check-certificate https://gist.github.com/shantanuo/5011482/raw/fb7fce38c2a4d296c67db715e02167caee8c79bf/mysql_to_redshift.sh | |
2) change the passwords | |
sed -i 's_XYZ_accesskey_' mysql_to_redshift.sh | |
sed -i 's_ABC_secretkey_' mysql_to_redshift.sh | |
sed -i 's_PASSWD_Root1234_' mysql_to_redshift.sh | |
// remove the limit of 1000 rows | |
sed -i 's/limit 1000//' mysql_to_redshift.sh | |
3) run the script with DB_name TBL_name | |
time sh -xv mysql_to_redshift.sh test email_lists | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script is awesome, thanks!