Skip to content

Instantly share code, notes, and snippets.

@smintz
Created July 15, 2013 13:08
Show Gist options
  • Save smintz/5999799 to your computer and use it in GitHub Desktop.
Save smintz/5999799 to your computer and use it in GitHub Desktop.
#!/bin/bash
export LC_ALL=en_US.UTF-8
SCHEMA=${DEFAULT_SCHEMA:-$1}
TABLE=${DEFAULT_TABLE:-$2}
SOURCEINSTANCE=${DEFAULT_SOURCEINSTANCE:-'source'}
SOURCEUSER=${DEFAULT_SOURCEUSER:-'admin'}
SOURCEPORT=${DEFAULT_SOURCEPORT:-'5439'}
SOURCEDB=${DEFAULT_SOURCEDB:-'db'}
SOURCESCHEMA=${DEFAULT_SOURCESCHEMA:-'public'}
TARGETINSTANCE=${DEFAULT_TARGETINSTANCE:-'target'}
TARGETUSER=${DEFAULT_TARGETUSER:-'admin'}
TARGETPORT=${DEFAULT_TARGETPORT:-'5439'}
TARGETDB=${DEFAULT_TARGETDB:-'db'}
TARGETSCHEMA=${DEFAULT_TARGETSCHEMA:-'public'}
S3BUCKET=${DEFAULT_S3BUCKET:-'s3://mybucket'}
usage() {
echo "Usage is: $0 <schemaname> <tablename>"
echo "Where schemaname is the name of the schema the table is on"
echo "And tablename is the name of the table to be transferred from RDS to RedShift"
}
main() {
echo "==================================================="
echo "$(date +"%F %T") Dumping data from RDS table to local files"
rm -f ${TABLE}.*
psql -h $SOURCEINSTANCE -U ${SOURCEUSER} -p ${SOURCEPORT} -A -t -F" " ${SOURCEDB} -c "SELECT * FROM ${SCHEMA}.${TABLE} $WHERE" | split -l 50000000 - ${TABLE}.tsv.
[ $? -eq 0 ] || exit 1
echo "==================================================="
echo "$(date +"%F %T") Compressing local files"
gzip ${TABLE}.tsv.*
[ $? -eq 0 ] || exit 1
echo "==================================================="
echo "$(date +"%F %T") Transferring local dump files to s3"
s3cmd --config=$(dirname $0)/.s3cfg sync ${TABLE}.tsv.*.gz ${S3BUCKET}/
[ $? -eq 0 ] || exit 1
echo "==================================================="
if [ ${DROP} ]; then
echo "$(date +"%F %T") Drop table from RedShift"
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} <<END
DROP TABLE $SCHEMA.$TABLE;
END
[ $? -eq 0 ] || exit 1
fi
echo "==================================================="
echo "$(date +"%F %T") Creating table on RedShift based on table scheme from RDS"
echo "" > schema
echo "CREATE TABLE ${SCHEMA}.${TABLE} (" >> schema
psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select (\"column\" || ' ' || type || ' ENCODE ' || encoding) from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE'" | sed 's/ENCODE none/ENCODE RAW/' | tr "\n" "," | sed 's/\([,]*\)$/)/' >> schema
SORTKEY=$(psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select \"column\" from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE' and sortkey > 0 order by sortkey" | tr "\n" "," | sed 's/\([,]*\)$//')
[ -n "$SORTKEY" ] && echo "sortkey ($SORTKEY)" >> schema
DESTKEY=$(psql -h ${SOURCEINSTANCE} -U ${SOURCEUSER} -p ${SOURCEPORT} ${SOURCEDB} -t -c "select \"column\" from pg_table_def where schemaname='$SCHEMA' and tablename = '$TABLE' and distkey = true" | tr "\n" "," | sed 's/\([,]*\)$//')
[ -n "$DESTKEY" ] && echo "distkey ($DESTKEY)" >> schema
echo ";" >> schema
cat schema
[ $? -eq 0 ] || exit 1
cat schema
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} -w -f schema
[ $? -eq 0 ] || exit 1
echo "==================================================="
echo "$(date +"%F %T") Loading data from s3 to RedShift"
echo > load_cmd
for i in ${TABLE}.*.gz; do
echo "copy ${TARGETSCHEMA}.${TABLE} from '${S3BUCKET}/$i' CREDENTIALS 'aws_access_key_id=${AWS_ACCESS_KEY_ID};aws_secret_access_key=${AWS_SECRET_ACCESS_KEY}' delimiter '\t' gzip TRUNCATECOLUMNS MAXERROR 100000 NULL 'NULL'; " >> load_cmd
done
psql -h ${TARGETINSTANCE} -U ${TARGETUSER} -p ${TARGETPORT} -d ${TARGETDB} -f load_cmd
[ $? -eq 0 ] || exit 1
echo "==================================================="
echo "$(date +"%F %T") Removing local and s3 files used"
rm ${TABLE}.tsv.*.gz
rm schema
rm load_cmd
s3cmd --config=$(dirname $0)/.s3cfg del ${S3BUCKET}/${TABLE}.tsv.*.gz
echo "==================================================="
echo "$(date +"%F %T") DONE!"
}
case $SCHEMA in
"") usage && exit 1;;
(--help|-h|-help|--h) usage && exit 1;;
esac
case $TABLE in
"") usage && exit 1 ;;
(--help|-h|-help|--h) usage && exit 1;;
esac
if ! [ -f ~/.pgpass ]; then
echo "You don't have a .pgpass file in your home directory - please copy it from git"
exit 1
fi
if ! [ -f ~/.s3cfg ]; then
echo "You don't have a .s3cfg file in your home directory - please copy it from git"
exit 1
fi
if ! [ -w ]; then
echo "You don't have write permissions to the current directory - please fix"
exit 1
fi
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment