Last active
May 22, 2016 22:49
-
-
Save elipapa/78de6b1f86e336382417 to your computer and use it in GitHub Desktop.
simple bash task queue to download, decompress and upload files to a S3 bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
while true; do | |
until [[ $usedspace -ge 60 ]]; do | |
for file_ready in queue/*.downloaded; do | |
filepath=${file_ready%.downloaded} | |
file=${filepath##*/} | |
srsid=${file/.*/} | |
if [ ! -e "queue/$srsid.decompressed" ] | |
then | |
echo "Decompressing $srsid ..." | |
mkdir $srsid | |
if tar xvf $file --use-compress-program=lbzip2 -C $srsid | |
then | |
echo "$file succesfully decompressed to $srsid. queueing for upload.. " | |
touch queue/$srsid.decompressed | |
else | |
echo "error in decompressing $file ... is there enough disk space?" | |
fi | |
fi | |
done | |
done | |
usedspace=$(df /home/ubuntu/decomp --output=pcent | sed '1d' | awk '{print $1}' | sed 's/\%//') | |
sleep 1 | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ ! -d queue ]; then | |
mkdir queue | |
fi | |
while IFS='' read -r line || [[ -n "$line" ]]; do | |
srsid=${line/.*/} | |
echo "looking if sample $srsid has already been completely processed..." | |
# if file exist on destination bucket, just skip to the next loop iteration | |
if aws s3 ls s3://finch-data/HMIWGS/$srsid/ | |
then | |
echo "something for $srsid is in the destination bucket. skipping." | |
continue | |
fi | |
if [[ -e "queue/$srsid.finished" ]]; then | |
echo "this $srsid was already uploaded decompressed. skipping." | |
continue | |
fi | |
if [ -e "queue/$line.downloaded" ] | |
then | |
echo "it seems I have already downloaded $line. skipping." | |
continue | |
fi | |
#check how much space has been taken up | |
usedspace=$(df /home/ubuntu/decomp --output=pcent | sed '1d' | awk '{print $1}' | sed 's/\%//') | |
while [[ $usedspace -ge 60 ]]; do | |
echo "$(date) -- disk getting full. waiting ... " | |
sleep 60 | |
usedspace=$(df /home/ubuntu/decomp --output=pcent | sed '1d' | awk '{print $1}' | sed 's/\%//') | |
done | |
echo "copying $line on to the local drive..." | |
if aws s3 cp s3://finch-data/Healthy_HMP_stool_WGS/$line . | |
then | |
echo "$line copied succesfully, queueing for decompression" | |
touch queue/$line.downloaded | |
else | |
echo "could not copy $line. moving to next sample..." | |
fi | |
#update to see if the volume is getting too full | |
done < "$1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SRS011061.tar.bz2 | |
SRS011084.tar.bz2 | |
SRS011134.tar.bz2 | |
SRS011239.tar.bz2 | |
SRS011271.tar.bz2 | |
SRS011302.tar.bz2 | |
SRS011405.tar.bz2 | |
SRS011452.tar.bz2 | |
SRS011529.tar.bz2 | |
SRS011586.tar.bz2 | |
SRS012273.tar.bz2 | |
SRS012849.tar.bz2 | |
SRS012902.tar.bz2 | |
SRS012969.tar.bz2 | |
SRS013098.tar.bz2 | |
SRS013158.tar.bz2 | |
SRS013215.tar.bz2 | |
SRS013216.tar.bz2 | |
SRS013476.tar.bz2 | |
SRS013521.tar.bz2 | |
SRS013638.tar.bz2 | |
SRS013639.tar.bz2 | |
SRS013687.tar.bz2 | |
SRS013800.tar.bz2 | |
SRS013951.tar.bz2 | |
SRS014235.tar.bz2 | |
SRS014287.tar.bz2 | |
SRS014313.tar.bz2 | |
SRS014459.tar.bz2 | |
SRS014613.tar.bz2 | |
SRS014683.tar.bz2 | |
SRS014923.tar.bz2 | |
SRS014979.tar.bz2 | |
SRS015065.tar.bz2 | |
SRS015133.tar.bz2 | |
SRS015190.tar.bz2 | |
SRS015217.tar.bz2 | |
SRS015264.tar.bz2 | |
SRS015369.tar.bz2 | |
SRS015431.tar.bz2 | |
SRS015578.tar.bz2 | |
SRS015663.tar.bz2 | |
SRS015782.tar.bz2 | |
SRS015794.tar.bz2 | |
SRS015854.tar.bz2 | |
SRS015890.tar.bz2 | |
SRS015960.tar.bz2 | |
SRS016018.tar.bz2 | |
SRS016056.tar.bz2 | |
SRS016095.tar.bz2 | |
SRS016203.tar.bz2 | |
SRS016267.tar.bz2 | |
SRS016335.tar.bz2 | |
SRS016437.tar.bz2 | |
SRS016438.tar.bz2 | |
SRS016495.tar.bz2 | |
SRS016517.tar.bz2 | |
SRS016585.tar.bz2 | |
SRS016753.tar.bz2 | |
SRS016954.tar.bz2 | |
SRS016989.tar.bz2 | |
SRS016990.tar.bz2 | |
SRS017103.tar.bz2 | |
SRS017191.tar.bz2 | |
SRS017247.tar.bz2 | |
SRS017307.tar.bz2 | |
SRS017433.tar.bz2 | |
SRS017521.tar.bz2 | |
SRS017701.tar.bz2 | |
SRS017821.tar.bz2 | |
SRS017916.tar.bz2 | |
SRS018133.tar.bz2 | |
SRS018313.tar.bz2 | |
SRS018351.tar.bz2 | |
SRS018427.tar.bz2 | |
SRS018575.tar.bz2 | |
SRS018656.tar.bz2 | |
SRS018817.tar.bz2 | |
SRS018984.tar.bz2 | |
SRS019030.tar.bz2 | |
SRS019068.tar.bz2 | |
SRS019161.tar.bz2 | |
SRS019267.tar.bz2 | |
SRS019381.tar.bz2 | |
SRS019397.tar.bz2 | |
SRS019582.tar.bz2 | |
SRS019601.tar.bz2 | |
SRS019685.tar.bz2 | |
SRS019787.tar.bz2 | |
SRS019910.tar.bz2 | |
SRS019968.tar.bz2 | |
SRS020233.tar.bz2 | |
SRS020328.tar.bz2 | |
SRS020622.tar.bz2 | |
SRS020869.tar.bz2 | |
SRS021153.tar.bz2 | |
SRS021219.tar.bz2 | |
SRS021484.tar.bz2 | |
SRS021948.tar.bz2 | |
SRS022071.tar.bz2 | |
SRS022093.tar.bz2 | |
SRS022137.tar.bz2 | |
SRS022524.tar.bz2 | |
SRS022609.tar.bz2 | |
SRS022713.tar.bz2 | |
SRS023176.tar.bz2 | |
SRS023346.tar.bz2 | |
SRS023526.tar.bz2 | |
SRS023583.tar.bz2 | |
SRS023829.tar.bz2 | |
SRS023914.tar.bz2 | |
SRS023971.tar.bz2 | |
SRS024009.tar.bz2 | |
SRS024075.tar.bz2 | |
SRS024132.tar.bz2 | |
SRS024265.tar.bz2 | |
SRS024331.tar.bz2 | |
SRS024388.tar.bz2 | |
SRS024435.tar.bz2 | |
SRS024549.tar.bz2 | |
SRS024625.tar.bz2 | |
SRS024663.tar.bz2 | |
SRS042284.tar.bz2 | |
SRS042628.tar.bz2 | |
SRS043001.tar.bz2 | |
SRS043411.tar.bz2 | |
SRS043667.tar.bz2 | |
SRS043701.tar.bz2 | |
SRS045004.tar.bz2 | |
SRS045528.tar.bz2 | |
SRS045645.tar.bz2 | |
SRS045713.tar.bz2 | |
SRS045739.tar.bz2 | |
SRS047014.tar.bz2 | |
SRS047044.tar.bz2 | |
SRS048164.tar.bz2 | |
SRS048870.tar.bz2 | |
SRS049164.tar.bz2 | |
SRS049402.tar.bz2 | |
SRS049712.tar.bz2 | |
SRS049896.tar.bz2 | |
SRS049900.tar.bz2 | |
SRS049959.tar.bz2 | |
SRS049995.tar.bz2 | |
SRS050026.tar.bz2 | |
SRS050299.tar.bz2 | |
SRS050422.tar.bz2 | |
SRS050752.tar.bz2 | |
SRS050925.tar.bz2 | |
SRS051031.tar.bz2 | |
SRS051882.tar.bz2 | |
SRS052027.tar.bz2 | |
SRS052697.tar.bz2 | |
SRS053214.tar.bz2 | |
SRS053335.tar.bz2 | |
SRS053356.tar.bz2 | |
SRS053398.tar.bz2 | |
SRS053573.tar.bz2 | |
SRS053649.tar.bz2 | |
SRS054352.tar.bz2 | |
SRS054590.tar.bz2 | |
SRS054956.tar.bz2 | |
SRS055533.tar.bz2 | |
SRS055982.tar.bz2 | |
SRS056259.tar.bz2 | |
SRS056273.tar.bz2 | |
SRS056519.tar.bz2 | |
SRS057478.tar.bz2 | |
SRS057717.tar.bz2 | |
SRS058070.tar.bz2 | |
SRS058723.tar.bz2 | |
SRS058770.tar.bz2 | |
SRS062427.tar.bz2 | |
SRS063040.tar.bz2 | |
SRS063489.tar.bz2 | |
SRS063518.tar.bz2 | |
SRS063985.tar.bz2 | |
SRS064276.tar.bz2 | |
SRS064557.tar.bz2 | |
SRS064645.tar.bz2 | |
SRS064973.tar.bz2 | |
SRS065504.tar.bz2 | |
SRS074670.tar.bz2 | |
SRS074964.tar.bz2 | |
SRS075078.tar.bz2 | |
SRS075341.tar.bz2 | |
SRS075398.tar.bz2 | |
SRS075963.tar.bz2 | |
SRS076929.tar.bz2 | |
SRS077086.tar.bz2 | |
SRS077194.tar.bz2 | |
SRS077294.tar.bz2 | |
SRS077335.tar.bz2 | |
SRS077502.tar.bz2 | |
SRS077552.tar.bz2 | |
SRS077730.tar.bz2 | |
SRS077753.tar.bz2 | |
SRS077849.tar.bz2 | |
SRS078176.tar.bz2 | |
SRS078242.tar.bz2 | |
SRS078419.tar.bz2 | |
SRS078665.tar.bz2 | |
SRS097889.tar.bz2 | |
SRS098514.tar.bz2 | |
SRS098571.tar.bz2 | |
SRS098644.tar.bz2 | |
SRS098717.tar.bz2 | |
SRS098827.tar.bz2 | |
SRS100021.tar.bz2 | |
SRS101376.tar.bz2 | |
SRS101433.tar.bz2 | |
SRS103987.tar.bz2 | |
SRS104197.tar.bz2 | |
SRS104311.tar.bz2 | |
SRS104400.tar.bz2 | |
SRS104485.tar.bz2 | |
SRS105153.tar.bz2 | |
SRS140492.tar.bz2 | |
SRS140513.tar.bz2 | |
SRS140645.tar.bz2 | |
SRS142503.tar.bz2 | |
SRS142505.tar.bz2 | |
SRS142599.tar.bz2 | |
SRS142712.tar.bz2 | |
SRS142890.tar.bz2 | |
SRS143070.tar.bz2 | |
SRS143181.tar.bz2 | |
SRS143342.tar.bz2 | |
SRS143417.tar.bz2 | |
SRS143598.tar.bz2 | |
SRS143780.tar.bz2 | |
SRS143876.tar.bz2 | |
SRS143991.tar.bz2 | |
SRS144362.tar.bz2 | |
SRS144506.tar.bz2 | |
SRS144537.tar.bz2 | |
SRS145497.tar.bz2 | |
SRS146764.tar.bz2 | |
SRS146812.tar.bz2 | |
SRS146813.tar.bz2 | |
SRS147022.tar.bz2 | |
SRS147088.tar.bz2 | |
SRS147139.tar.bz2 | |
SRS147346.tar.bz2 | |
SRS147445.tar.bz2 | |
SRS147652.tar.bz2 | |
SRS147766.tar.bz2 | |
SRS147919.tar.bz2 | |
SRS148196.tar.bz2 | |
SRS148424.tar.bz2 | |
SRS148721.tar.bz2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
while true; do | |
for file_ready in queue/*.decompressed; do | |
filepath=${file_ready%.decompressed} | |
if [[ ! -e "$filepath.finished" ]]; then | |
srsid=${filepath##*/} | |
echo "queueing $srsid for upload" | |
# this find hack was necessary since some tar files do not open in a directory | |
if find $srsid -name "*.fastq" -printf '%P\n' | xargs -I {} -n1 aws s3 cp $srsid/{} s3://finch-data/HMIWGS/stool/$srsid/ | |
then | |
echo "upload of $srsid succesful. cleaning disk space" | |
rm -rf $srsid/ | |
rm -f $srsid.tar.bz2 | |
touch queue/$srsid.finished | |
else | |
echo "leaving $srsid decompressed samples on disk." | |
fi | |
fi | |
done | |
sleep 1 | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!#!/usr/bin/env bash | |
#r3.4xlarge (0.6$ bid) | |
sudo apt-get install python-pip | |
sudo pip install awscli | |
sudo mkfs -t ext4 /dev/xvdb | |
mkdir decomp | |
sudo mount /dev/xvdb decomp/ | |
sudo chown -R ubuntu:ubuntu decomp/ | |
cd /home/ubuntu/decomp | |
sudo -u ubuntu git clone https://gist.github.com/78de6b1f86e336382417.git src | |
#better to launch a tmux session and run the three processes in parallel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment