Created
July 8, 2024 17:38
-
-
Save RyanZurrin/999a3061fd6531b9694a44be44ce859d to your computer and use it in GitHub Desktop.
copy random subjects from s3 bucket
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: | |
# ./copy_random_subjects.sh s3://nda-enclave-c3371/HCP/HCPD/ 10 /local/destination/path | |
S3_PATH="$1" # S3 bucket and path without trailing slash | |
NUM_SUBJECTS="$2" # Number of subjects to randomly select | |
DESTINATION_PATH="$3" # Local destination path | |
# Ensure awscli is installed | |
if ! command -v aws &> /dev/null; then | |
echo "awscli is not installed. Please install it and retry." | |
exit 1 | |
fi | |
# List subjects and select randomly | |
SUBJECTS=$(aws s3 ls "${S3_PATH}" | awk '{print $2}' | shuf -n "${NUM_SUBJECTS}") | |
# Check if SUBJECTS is empty | |
if [ -z "$SUBJECTS" ]; then | |
echo "No subjects found or aws s3 ls command failed." | |
exit 1 | |
fi | |
# Copy each selected subject | |
for SUBJECT in $SUBJECTS; do | |
SUBJECT_DIR="${DESTINATION_PATH}/$(echo ${SUBJECT} | sed 's/\/$//')" # Remove trailing slash and prepend with destination path | |
echo "Copying ${SUBJECT} to ${SUBJECT_DIR}" | |
mkdir -p "${SUBJECT_DIR}" # Ensure the destination directory exists | |
aws s3 cp "${S3_PATH}${SUBJECT}" "${SUBJECT_DIR}" --recursive | |
done | |
echo "Copy completed." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment