Created
November 18, 2024 18:21
-
-
Save peterk87/fefb2dcb836fc9a7eb3d084f18225b29 to your computer and use it in GitHub Desktop.
nf-flu wrapper script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
error_handler() { | |
echo -e "\n\033[1;31mError on line $1\033[0m" | |
# Perform any cleanup or logging here | |
} | |
handle_interrupt() { | |
echo -e "\n\033[1;31mERROR:\033[1m Script interrupted...\033[0m" | |
cleanup | |
exit 1 | |
} | |
# Trap ERR signal to handle errors | |
trap 'error_handler $LINENO' ERR | |
# Trap SIGINT and SIGTERM to handle interruptions | |
trap handle_interrupt SIGINT SIGTERM | |
error() { | |
echo -e "$(date -Is) \033[1;31mERROR: \033[0m\033[1m$1\033[0m" | |
} | |
info() { | |
echo -e "$(date -Is) \033[1;32mINFO: \033[0m\033[1m$1\033[0m" | |
} | |
# Default values | |
TOTAL_CPUS=$(nproc) | |
FREE_MEM_KB=$(awk '/MemAvailable/ {print $2}' /proc/meminfo) | |
FREE_MEM_GB=$(echo "scale=2; $FREE_MEM_KB / 1024 / 1024" | bc) | |
NF_FLU_VERSION=$(curl -s https://api.github.com/repos/CFIA-NCFAD/nf-flu/releases/latest | grep '"tag_name"' | sed -E 's/.*"tag_name": *"([^"]+)".*/\1/') | |
NCPUS=$TOTAL_CPUS | |
MEM_TO_USE_GB=$FREE_MEM_GB | |
REF_DB="" | |
PLATFORM="nanopore" | |
PROFILE="docker" | |
SAMPLESHEET="" | |
OUTDIR="$(date -I)-results" | |
DEST="" | |
FASTA_ZST_URL="https://api.figshare.com/v2/file/download/41415330" | |
CSV_ZST_URL="https://api.figshare.com/v2/file/download/41415333" | |
FASTA_ZST_FILE="influenza.fna.zst" | |
CSV_ZST_FILE="influenza.csv.zst" | |
download_file() { | |
local url=$1 | |
local output=$2 | |
if [ ! -f "$output" ]; then | |
info "Downloading $output from $url..." | |
curl --silent -SLk "$url" -o "$output" | |
info "Downloaded $output." | |
else | |
info "$output already exists. Skipping download." | |
fi | |
} | |
# Usage instructions | |
usage() { | |
echo "Usage: $0 [options] [-- [options to pass to Nextflow/nf-flu]]" | |
echo "" | |
echo "Options:" | |
echo " -h Show this help message and exit" | |
echo " -i <samplesheet> Path to the input sample sheet (required)" | |
echo " -o <outdir> Path to the output directory (default: $OUTDIR)" | |
echo " -d <dest> Rsync destination for final results (default: $DEST)" | |
echo " -r <ref_db> Path to reference database (optional)" | |
echo " -p <platform> Sequencing platform (illumina/nanopore) (default: $PLATFORM)" | |
echo " -P <profile> nf-flu profile (default: $PROFILE)" | |
echo " -v <nf_flu_version> nf-flu version (by default, use the latest release: $NF_FLU_VERSION)" | |
echo " -t <cpus> Number of CPUs to use (default: $TOTAL_CPUS)" | |
echo " -m <memory> Memory to use in GB (default: ${FREE_MEM_GB} GB)" | |
echo " -f <influenza_fasta> Influenza.fna.zst path (default: ${FASTA_ZST_FILE}; will download from FigShare if file does not exist ($FASTA_ZST_FILE))" | |
echo " -c <influenza_csv> Influenza.csv.zst path (default: ${CSV_ZST_FILE}; will download from FigShare if file does not exist ($CSV_ZST_URL))" | |
echo "" | |
echo "Example usage: run nf-flu with all available memory and CPUs and rsync to a NFS path:" | |
echo " $0 -i samplesheet.csv -d /nfs/path/to/outputs/" | |
echo "" | |
echo "Example usage: run nf-flu with custom arguments to specify latest Rerio Clair3 variant calling model for R10 and Dorado with SUP model:" | |
echo " $0 -i samplesheet.csv -d /nfs/path/to/outputs/ -- --clair3_user_variant_model /data/rerio/clair3_models/r1041_e82_400bps_sup_v500" | |
exit 1 | |
} | |
# Parse command-line options | |
while getopts ":ht:m:r:p:P:i:o:d:f:c:" opt; do | |
case ${opt} in | |
h) usage; exit 0 ;; | |
t) NCPUS=$OPTARG ;; | |
m) MEM_TO_USE_GB=$OPTARG ;; | |
r) REF_DB=$OPTARG ;; | |
p) PLATFORM=$OPTARG ;; | |
P) PROFILE=$OPTARG ;; | |
i) SAMPLESHEET=$OPTARG ;; | |
o) OUTDIR=$OPTARG ;; | |
d) DEST=$OPTARG ;; | |
v) NF_FLU_VERSION=$OPTARG ;; | |
f) FASTA_ZST_FILE=$OPTARG ;; | |
c) CSV_ZST_FILE=$OPTARG ;; | |
\?) echo "Invalid option: -$OPTARG" >&2; usage; exit 1 ;; | |
:) echo "Option -$OPTARG requires an argument." >&2; usage; exit 1 ;; | |
esac | |
done | |
shift $((OPTIND - 1)) | |
if [[ "${1:-}" == "--" ]]; then | |
shift | |
fi | |
# Validate required arguments | |
if [[ -z "$SAMPLESHEET" || -z "$OUTDIR" ]]; then | |
echo -e "\033[1;31mERROR:\033[1m Missing required arguments for samplesheet and nf-flu output directory (-i, -o)\033[0m" | |
usage | |
fi | |
info "Starting nf-flu (v$NF_FLU_VERSION) ${PLATFORM} analysis with ${NCPUS} CPU cores and ${MEM_TO_USE_GB} GB memory..." | |
info "Download nf-flu FASTA and CSV files if necessary" | |
download_file "$FASTA_ZST_URL" "$FASTA_ZST_FILE" | |
download_file "$CSV_ZST_URL" "$CSV_ZST_FILE" | |
# Pull the nf-flu pipeline | |
echo "Pulling the nf-flu pipeline..." | |
NXF_PULL_CMD="nextflow pull CFIA-NCFAD/nf-flu -r master" | |
echo "Running: $NXF_PULL_CMD" | |
eval "$NXF_PULL_CMD" || { echo -e "\033[1;31mERROR:\033[1m Failed to pull nf-flu pipeline\033[0m"; exit 1; } | |
# Construct the Nextflow command | |
NF_FLU_CMD="nextflow run CFIA-NCFAD/nf-flu \ | |
-resume \ | |
-r "$NF_FLU_VERSION" \ | |
-profile $PROFILE \ | |
--platform $PLATFORM \ | |
--max_cpus $NCPUS \ | |
--max_memory \"${MEM_TO_USE_GB} GB\" \ | |
--ncbi_influenza_fasta $FASTA_ZST_FILE \ | |
--ncbi_influenza_metadata $CSV_ZST_FILE \ | |
--input $SAMPLESHEET \ | |
--outdir $OUTDIR $@" | |
[[ -n "$REF_DB" ]] && NF_FLU_CMD+=" --ref_db $REF_DB" | |
# Run the nf-flu pipeline | |
echo "Running the nf-flu pipeline..." | |
echo "Running: $NF_FLU_CMD" | |
eval "$NF_FLU_CMD" || { echo -e "\033[1;31mERROR:\033[1m nf-flu pipeline execution failed\033[0m"; exit 1; } | |
# Sync results to the destination | |
if [[ -z "$DEST" ]]; then | |
echo "No valid rsync destination specified. Destination='$DEST'" | |
else | |
echo "Syncing results to the destination directory..." | |
RSYNC_CMD="rsync -rt --progress --stats --exclude=\"work/\" --exclude=\".nextflow/\" \"$PWD\" \"$DEST\"" | |
echo "Running: $RSYNC_CMD" | |
eval "$RSYNC_CMD" || { echo -e "\033[1;31mERROR:\033[1m Failed to sync results to destination\033[0m"; exit 1; } | |
echo -e "\033[1;32mSUCCESS:\033[1m nf-flu pipeline completed successfully, and results are synced to $DEST\033[0m" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment