Created
March 15, 2018 18:30
-
-
Save DanielMuller/2361f0a3a1a51e3561825b3b54399cb5 to your computer and use it in GitHub Desktop.
Extract data from MySQL and upload to S3 for access with Athena
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
DBUSER=my_username | |
DBPASS=my_secret_password | |
DBHOST=my_hostname | |
DBNAME=my_database_name | |
TABLE=my_table | |
BUCKET=my_bucket | |
yesterdayFile=$(date -d '1 day ago' +"%Y-%m-%d")'.tsv' | |
if [ -f lastId ]; then | |
id=$(cat lastId) | |
else | |
id=0 | |
fi | |
query="select id, status, created_at from $TABLE where id>$id order by id" | |
mysql -u $DBUSER -p$DBPASS -h $DBHOST -B -N --quick -e "$query" $DBNAME > output.tsv | |
mkdir -p daily | |
rm -rf daily/* | |
awk '{ print $0 > "daily/" $6 ".tsv" }' output.tsv | |
rm -f daily/.tsv | |
cd daily | |
if [ ! -f $yesterdayFile ]; then | |
lastId=0 | |
else | |
lastId=$(tail -n1 $yesterdayFile | awk '{print $1}') | |
fi | |
for file in *.tsv; do | |
filename=`basename $file '.tsv'` | |
year=`echo $filename | cut -b1-4` | |
month=`echo $filename | cut -b6-7` | |
day=`echo $filename | cut -b9-10` | |
path="year=$year/month=$month/day=$day" | |
mkdir -p $path | |
mv $file $path/ | |
prev=$(pwd) | |
cd $path | |
split -a 2 -d --additional-suffix=.tsv -C 1024m $file data_ | |
gzip data_* | |
rm -f $file | |
cd $prev | |
done | |
cd .. | |
aws s3 sync daily/ s3://$BUCKET/mysql-data/$TABLE/ | |
echo $lastId > lastId | |
# Extract data from Athena using aws-cli | |
# sed -i 's/"//g' 2018_02.csv | |
# mkdir delete | |
# cd delete | |
# split -a 3 -l 100000 ../2018_02.csv | |
# for file in *;do sed -i 's/$/,/' $file; tr -d '\n' < $file > output.lst;mv output.lst $file;sed -i 's/^/set autocommit=0;\ndelete from $TABLE where id in (/' $file;sed -i 's/,$/);\ncommit;/' $file;done | |
# n=0;for file in x*;do echo $file;time mysql -u $DBUSER -p$DBPASS -h $DBHOST $DBNAME < $file;mv $file y$file;if [ $(($n % 20)) -eq 0 ];then sleep 900;else sleep 30;fi;let n=$n+1;done; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment