Skip to content

Instantly share code, notes, and snippets.

@peterk87
Created November 18, 2024 18:21
Show Gist options
  • Save peterk87/fefb2dcb836fc9a7eb3d084f18225b29 to your computer and use it in GitHub Desktop.
Save peterk87/fefb2dcb836fc9a7eb3d084f18225b29 to your computer and use it in GitHub Desktop.
nf-flu wrapper script
#!/bin/bash
set -euo pipefail
error_handler() {
echo -e "\n\033[1;31mError on line $1\033[0m"
# Perform any cleanup or logging here
}
handle_interrupt() {
echo -e "\n\033[1;31mERROR:\033[1m Script interrupted...\033[0m"
cleanup
exit 1
}
# Trap ERR signal to handle errors
trap 'error_handler $LINENO' ERR
# Trap SIGINT and SIGTERM to handle interruptions
trap handle_interrupt SIGINT SIGTERM
error() {
echo -e "$(date -Is) \033[1;31mERROR: \033[0m\033[1m$1\033[0m"
}
info() {
echo -e "$(date -Is) \033[1;32mINFO: \033[0m\033[1m$1\033[0m"
}
# Default values
TOTAL_CPUS=$(nproc)
FREE_MEM_KB=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
FREE_MEM_GB=$(echo "scale=2; $FREE_MEM_KB / 1024 / 1024" | bc)
NF_FLU_VERSION=$(curl -s https://api.github.com/repos/CFIA-NCFAD/nf-flu/releases/latest | grep '"tag_name"' | sed -E 's/.*"tag_name": *"([^"]+)".*/\1/')
NCPUS=$TOTAL_CPUS
MEM_TO_USE_GB=$FREE_MEM_GB
REF_DB=""
PLATFORM="nanopore"
PROFILE="docker"
SAMPLESHEET=""
OUTDIR="$(date -I)-results"
DEST=""
FASTA_ZST_URL="https://api.figshare.com/v2/file/download/41415330"
CSV_ZST_URL="https://api.figshare.com/v2/file/download/41415333"
FASTA_ZST_FILE="influenza.fna.zst"
CSV_ZST_FILE="influenza.csv.zst"
download_file() {
local url=$1
local output=$2
if [ ! -f "$output" ]; then
info "Downloading $output from $url..."
curl --silent -SLk "$url" -o "$output"
info "Downloaded $output."
else
info "$output already exists. Skipping download."
fi
}
# Usage instructions
usage() {
echo "Usage: $0 [options] [-- [options to pass to Nextflow/nf-flu]]"
echo ""
echo "Options:"
echo " -h Show this help message and exit"
echo " -i <samplesheet> Path to the input sample sheet (required)"
echo " -o <outdir> Path to the output directory (default: $OUTDIR)"
echo " -d <dest> Rsync destination for final results (default: $DEST)"
echo " -r <ref_db> Path to reference database (optional)"
echo " -p <platform> Sequencing platform (illumina/nanopore) (default: $PLATFORM)"
echo " -P <profile> nf-flu profile (default: $PROFILE)"
echo " -v <nf_flu_version> nf-flu version (by default, use the latest release: $NF_FLU_VERSION)"
echo " -t <cpus> Number of CPUs to use (default: $TOTAL_CPUS)"
echo " -m <memory> Memory to use in GB (default: ${FREE_MEM_GB} GB)"
echo " -f <influenza_fasta> Influenza.fna.zst path (default: ${FASTA_ZST_FILE}; will download from FigShare if file does not exist ($FASTA_ZST_FILE))"
echo " -c <influenza_csv> Influenza.csv.zst path (default: ${CSV_ZST_FILE}; will download from FigShare if file does not exist ($CSV_ZST_URL))"
echo ""
echo "Example usage: run nf-flu with all available memory and CPUs and rsync to a NFS path:"
echo " $0 -i samplesheet.csv -d /nfs/path/to/outputs/"
echo ""
echo "Example usage: run nf-flu with custom arguments to specify latest Rerio Clair3 variant calling model for R10 and Dorado with SUP model:"
echo " $0 -i samplesheet.csv -d /nfs/path/to/outputs/ -- --clair3_user_variant_model /data/rerio/clair3_models/r1041_e82_400bps_sup_v500"
exit 1
}
# Parse command-line options
while getopts ":ht:m:r:p:P:i:o:d:f:c:" opt; do
case ${opt} in
h) usage; exit 0 ;;
t) NCPUS=$OPTARG ;;
m) MEM_TO_USE_GB=$OPTARG ;;
r) REF_DB=$OPTARG ;;
p) PLATFORM=$OPTARG ;;
P) PROFILE=$OPTARG ;;
i) SAMPLESHEET=$OPTARG ;;
o) OUTDIR=$OPTARG ;;
d) DEST=$OPTARG ;;
v) NF_FLU_VERSION=$OPTARG ;;
f) FASTA_ZST_FILE=$OPTARG ;;
c) CSV_ZST_FILE=$OPTARG ;;
\?) echo "Invalid option: -$OPTARG" >&2; usage; exit 1 ;;
:) echo "Option -$OPTARG requires an argument." >&2; usage; exit 1 ;;
esac
done
shift $((OPTIND - 1))
if [[ "${1:-}" == "--" ]]; then
shift
fi
# Validate required arguments
if [[ -z "$SAMPLESHEET" || -z "$OUTDIR" ]]; then
echo -e "\033[1;31mERROR:\033[1m Missing required arguments for samplesheet and nf-flu output directory (-i, -o)\033[0m"
usage
fi
info "Starting nf-flu (v$NF_FLU_VERSION) ${PLATFORM} analysis with ${NCPUS} CPU cores and ${MEM_TO_USE_GB} GB memory..."
info "Download nf-flu FASTA and CSV files if necessary"
download_file "$FASTA_ZST_URL" "$FASTA_ZST_FILE"
download_file "$CSV_ZST_URL" "$CSV_ZST_FILE"
# Pull the nf-flu pipeline
echo "Pulling the nf-flu pipeline..."
NXF_PULL_CMD="nextflow pull CFIA-NCFAD/nf-flu -r master"
echo "Running: $NXF_PULL_CMD"
eval "$NXF_PULL_CMD" || { echo -e "\033[1;31mERROR:\033[1m Failed to pull nf-flu pipeline\033[0m"; exit 1; }
# Construct the Nextflow command
NF_FLU_CMD="nextflow run CFIA-NCFAD/nf-flu \
-resume \
-r "$NF_FLU_VERSION" \
-profile $PROFILE \
--platform $PLATFORM \
--max_cpus $NCPUS \
--max_memory \"${MEM_TO_USE_GB} GB\" \
--ncbi_influenza_fasta $FASTA_ZST_FILE \
--ncbi_influenza_metadata $CSV_ZST_FILE \
--input $SAMPLESHEET \
--outdir $OUTDIR $@"
[[ -n "$REF_DB" ]] && NF_FLU_CMD+=" --ref_db $REF_DB"
# Run the nf-flu pipeline
echo "Running the nf-flu pipeline..."
echo "Running: $NF_FLU_CMD"
eval "$NF_FLU_CMD" || { echo -e "\033[1;31mERROR:\033[1m nf-flu pipeline execution failed\033[0m"; exit 1; }
# Sync results to the destination
if [[ -z "$DEST" ]]; then
echo "No valid rsync destination specified. Destination='$DEST'"
else
echo "Syncing results to the destination directory..."
RSYNC_CMD="rsync -rt --progress --stats --exclude=\"work/\" --exclude=\".nextflow/\" \"$PWD\" \"$DEST\""
echo "Running: $RSYNC_CMD"
eval "$RSYNC_CMD" || { echo -e "\033[1;31mERROR:\033[1m Failed to sync results to destination\033[0m"; exit 1; }
echo -e "\033[1;32mSUCCESS:\033[1m nf-flu pipeline completed successfully, and results are synced to $DEST\033[0m"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment