Last active
July 19, 2024 18:10
-
-
Save tomkinsc/12c48a9fc6b958dcf89f9465358d5db4 to your computer and use it in GitHub Desktop.
Transfer all data from a BaseSpace project to a Google Storage bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ $# -eq 0 ]; then | |
echo "This script can be used to transfer *all* files from a BaseSpace project to a Google Storage bucket" | |
echo "Usage: $0 BaseSpaceProjectName GCPBucketPrefix" | |
echo "" | |
echo " NOTE: This will download data from BaseSpace to the machine running this script" | |
echo " before uploading to GS, so ample uninterrupted bandwidth is required" | |
echo " for upload and download." | |
echo " Files are piped, so disk usage required for the transfer is negligible." | |
echo "" | |
echo " Before running, be sure to log in to BaseSpace, authenticate to GCP:" | |
echo " bs auth" | |
echo " gcloud auth" | |
echo "" | |
echo " The CLI toolkits for bs and gcloud can be found here and must be installed first:" | |
echo " https://developer.basespace.illumina.com/docs/content/documentation/cli/cli-overview" | |
echo " https://cloud.google.com/sdk/docs/install-sdk" | |
exit 1 | |
fi | |
bsproject="$1" | |
gcpbucket="$(echo $2| sed 's/gs:\/\///g')" # allow prefixes to be specified with or without gs:// | |
for fileinfo in $(bs contents run --name "$bsproject" --template='{{.Id}},{{.FilePath}}'); do | |
fileid=$(echo "$fileinfo" | cut -d, -f1) | |
filename=$(echo "$fileinfo" | cut -d, -f2) | |
echo "Starting job to transfer file: $filename" | |
bslink=$(bs file link --id "$fileid") | |
containing_dir=$(dirname "${filename}") | |
curl $bslink | gcloud storage cp - "gs://${gcpbucket}/${containing_dir}/${filename}" | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment