Created
July 15, 2013 13:08
-
-
Save smintz/5999799 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export LC_ALL=en_US.UTF-8 | |
SCHEMA=${DEFAULT_SCHEMA:-$1} | |
TABLE=${DEFAULT_TABLE:-$2} | |
SOURCEINSTANCE=${DEFAULT_SOURCEINSTANCE:-'source'} | |
SOURCEUSER=${DEFAULT_SOURCEUSER:-'admin'} | |
SOURCEPORT=${DEFAULT_SOURCEPORT:-'5439'} | |
SOURCEDB=${DEFAULT_SOURCEDB:-'db'} | |
SOURCESCHEMA=${DEFAULT_SOURCESCHEMA:-'public'} | |
TARGETINSTANCE=${DEFAULT_TARGETINSTANCE:-'target'} | |
TARGETUSER=${DEFAULT_TARGETUSER:-'admin'} | |
TARGETPORT=${DEFAULT_TARGETPORT:-'5439'} | |
TARGETDB=${DEFAULT_TARGETDB:-'db'} | |
TARGETSCHEMA=${DEFAULT_TARGETSCHEMA:-'public'} | |
S3BUCKET=${DEFAULT_S3BUCKET:-'s3://mybucket'} | |
usage() { | |
echo "Usage is: $0 <schemaname> <tablename>" | |
echo "Where schemaname is the name of the schema the table is on" | |
echo "And tablename is the name of the table to be transferred from RDS to RedShift" | |
} | |
main() { | |
echo "===================================================" | |
echo "$(date +"%F %T") Dumping data from RDS table to local files" | |
rm -f ${TABLE}.* | |
psql -h $SOURCEINSTANCE -U ${SOURCEUSER} -p ${SOURCEPORT} -A -t -F" " ${SOURCEDB} -c "SELECT * FROM ${SCHEMA}.${TABLE} $WHERE" | split -l 50000000 - ${TABLE}.tsv. | |
[ $? -eq 0 ] || exit 1 | |
echo "===================================================" | |
echo "$(date +"%F %T") Compressing local files" | |
gzip ${TABLE}.tsv.* | |
[ $? -eq 0 ] || exit 1 | |
echo "===================================================" | |
echo "$(date +"%F %T") Transferring local dump files to s3" | |
s3cmd --config=$(dirname $0)/.s3cfg sync ${TABLE}.tsv.*.gz ${S3BUCKET}/ | |
[ $? -eq 0 ] || exit 1 | |
echo "===================================================" | |
if [ ${DROP} ]; then | |
echo "$(date +"%F %T") Drop table from RedShift" | |
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} <<END | |
DROP TABLE $SCHEMA.$TABLE; | |
END | |
[ $? -eq 0 ] || exit 1 | |
fi | |
echo "===================================================" | |
echo "$(date +"%F %T") Creating table on RedShift based on table scheme from RDS" | |
echo "" > schema | |
echo "CREATE TABLE ${SCHEMA}.${TABLE} (" >> schema | |
psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select (\"column\" || ' ' || type || ' ENCODE ' || encoding) from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE'" | sed 's/ENCODE none/ENCODE RAW/' | tr "\n" "," | sed 's/\([,]*\)$/)/' >> schema | |
SORTKEY=$(psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select \"column\" from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE' and sortkey > 0 order by sortkey" | tr "\n" "," | sed 's/\([,]*\)$//') | |
[ -n "$SORTKEY" ] && echo "sortkey ($SORTKEY)" >> schema | |
DESTKEY=$(psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select \"column\" from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE' and distkey = true" | tr "\n" "," | sed 's/\([,]*\)$//') | |
[ -n "$DESTKEY" ] && echo "distkey ($DESTKEY)" >> schema | |
echo ";" >> schema | |
cat schema | |
[ $? -eq 0 ] || exit 1 | |
cat schema | |
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} -w -f schema | |
[ $? -eq 0 ] || exit 1 | |
echo "===================================================" | |
echo "$(date +"%F %T") Loading data from s3 to RedShift" | |
echo > load_cmd | |
for i in ${TABLE}.*.gz; do | |
echo "copy ${TARGETSCHEMA}.${TABLE} from '${S3BUCKET}/$i' CREDENTIALS 'aws_access_key_id=${AWS_ACCESS_KEY_ID};aws_secret_access_key=${AWS_SECRET_ACCESS_KEY}' delimiter '\t' gzip TRUNCATECOLUMNS MAXERROR 100000 NULL 'NULL'; " >> load_cmd | |
done | |
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} -f load_cmd | |
[ $? -eq 0 ] || exit 1 | |
echo "===================================================" | |
echo "$(date +"%F %T") Removing local and s3 files used" | |
rm ${TABLE}.tsv.*.gz | |
rm schema | |
rm load_cmd | |
s3cmd --config=$(dirname $0)/.s3cfg del ${S3BUCKET}/${TABLE}.tsv.*.gz | |
echo "===================================================" | |
echo "$(date +"%F %T") DONE!" | |
} | |
case $SCHEMA in | |
"") usage && exit 1;; | |
(--help|-h|-help|--h) usage && exit 1;; | |
esac | |
case $TABLE in | |
"") usage && exit 1 ;; | |
(--help|-h|-help|--h) usage && exit 1;; | |
esac | |
if ! [ -f ~/.pgpass ]; then | |
echo "You don't have a .pgpass file in your home directory - please copy it from git" | |
exit 1 | |
fi | |
if ! [ -f ~/.s3cfg ]; then | |
echo "You don't have a .s3cfg file in your home directory - please copy it from git" | |
exit 1 | |
fi | |
if ! [ -w ]; then | |
echo "You don't have write permissions to the current directory - please fix" | |
exit 1 | |
fi | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment