Last active
April 10, 2025 15:18
-
-
Save peterk87/ce19e9994f9a135db6023458a96d8158 to your computer and use it in GitHub Desktop.
Create ZIP file of selected nf-flu results
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
SAMPLESHEET="samplesheet.csv" | |
OUTPUT_DIR="results" | |
ZIP_PACKAGE_DIR="$(date -I)-nf-flu-results" | |
VERBOSE=false | |
OVERWRITE=false | |
# Help message | |
print_help() { | |
echo "Usage: $0 -s <samplesheet> -o <output_dir> -d <zip_package_dir>" | |
echo | |
echo "Create a zip package of selected nf-flu results given a samplesheet CSV" | |
echo | |
echo "Options:" | |
echo " -s SAMPLESHEET Samplesheet file in CSV format. Used for getting sample names. First column should contain sample names (default: $SAMPLESHEET)" | |
echo " -o OUTPUT_DIR Output directory containing the nf-flu results (default: $OUTPUT_DIR)" | |
echo " -d ZIP_PACKAGE_DIR Directory to store the zip package (default: $ZIP_PACKAGE_DIR)" | |
echo " -O Overwrite output files (default: $OVERWRITE)" | |
echo " -v Verbose output" | |
echo " -h Display this help message" | |
} | |
# Parse command line options | |
while getopts "s:o:d:Ohv" opt; do | |
case $opt in | |
s) SAMPLESHEET=$OPTARG ;; | |
o) OUTPUT_DIR=$OPTARG ;; | |
d) ZIP_PACKAGE_DIR=$OPTARG ;; | |
O) OVERWRITE=true ;; | |
v) VERBOSE=true ;; | |
h) print_help; exit 0 ;; | |
\?) echo "Invalid option: -$OPTARG" >&2 ;; | |
esac | |
done | |
error_handler() { | |
echo -e "\n\033[1;31mError on line $1\033[0m" | |
# Perform any cleanup or logging here | |
} | |
cleanup() { | |
echo "Cleaning up before exiting..." | |
echo "Removing temporary directory at $ZIP_PACKAGE_DIR" | |
rm -rf "$ZIP_PACKAGE_DIR" | |
} | |
handle_interrupt() { | |
echo -e "\n\033[1;31mERROR:\033[1m Script interrupted...\033[0m" | |
cleanup | |
exit 1 | |
} | |
# Trap ERR signal to handle errors | |
trap 'error_handler $LINENO' ERR | |
# Trap EXIT signal to perform cleanup | |
trap cleanup EXIT | |
# Trap SIGINT and SIGTERM to handle interruptions | |
trap handle_interrupt SIGINT SIGTERM | |
error() { | |
echo -e "$(date -Is) \033[1;31mERROR: \033[0m\033[1m$1\033[0m" | |
} | |
warning() { | |
echo -e "$(date -Is) \033[1;33mWARNING: \033[0m\033[1m$1\033[0m" | |
} | |
info() { | |
echo -e "$(date -Is) \033[1;32mINFO: \033[0m\033[1m$1\033[0m" | |
} | |
if [[ ! -f "$SAMPLESHEET" ]]; then | |
error "Samplesheet not found at $SAMPLESHEET" | |
exit 1 | |
fi | |
if [[ ! -d "$OUTPUT_DIR" ]]; then | |
error "Output directory not found at $OUTPUT_DIR" | |
exit 1 | |
fi | |
if [[ -d "$ZIP_PACKAGE_DIR" ]]; then | |
error "Directory already exists at $ZIP_PACKAGE_DIR" | |
if [[ $OVERWRITE == false ]]; then | |
exit 1 | |
fi | |
fi | |
info "Reading samplesheet from $SAMPLESHEET" | |
info "Output directory: $OUTPUT_DIR" | |
info "Creating temporary directory at $ZIP_PACKAGE_DIR" | |
# from samplesheet.csv read column after header in first row into Bash array | |
samples=($(awk -F, 'NR>1 {print $1}' "$SAMPLESHEET" | uniq)) | |
# join sample names with ; delimiter | |
samples_joined=$(printf "; %s" "${samples[@]}") | |
info "Found the following samples: ${samples_joined:2}" | |
# Create a temporary directory to store the results | |
mkdir -p "$ZIP_PACKAGE_DIR" | |
if [ -f "$ZIP_PACKAGE_DIR.zip" ]; then | |
error "Zip package already exists at $ZIP_PACKAGE_DIR.zip!" | |
if [[ $OVERWRITE == false ]]; then | |
exit 1 | |
fi | |
fi | |
subtyping_report=$(realpath "$OUTPUT_DIR/nf-flu-subtyping-report.xlsx") | |
if [[ -z "$subtyping_report" ]]; then | |
error "Subtyping report not found" | |
else | |
if [[ $VERBOSE == true ]]; then | |
info "Found subtyping report at $subtyping_report" | |
fi | |
ln -s "$subtyping_report" "$ZIP_PACKAGE_DIR/nf-flu-subtyping-report.xlsx" | |
fi | |
nextclade_tsv=$(realpath "$OUTPUT_DIR/nextclade/nextclade.tsv") | |
if [[ -z "$nextclade_tsv" ]]; then | |
error "Nextclade TSV not found" | |
else | |
if [[ $VERBOSE == true ]]; then | |
info "Found subtyping report at $nextclade_tsv" | |
fi | |
ln -s "$nextclade_tsv" "$ZIP_PACKAGE_DIR/nextclade.tsv" | |
fi | |
flumut_outputs=($(realpath "$OUTPUT_DIR/flumut/"*)) | |
if [[ $VERBOSE == true ]]; then | |
info "Found ${#flumut_outputs[@]} Flumut outputs" | |
info "1st Flumut output: ${flumut_outputs[0]}" | |
info "1st Flumut output realpath: $(realpath ${flumut_outputs[0]})" | |
fi | |
mkdir -p "$ZIP_PACKAGE_DIR/FluMut" | |
for flumut_out in "${flumut_outputs[@]}"; do | |
ln -s "$flumut_out" "$ZIP_PACKAGE_DIR/FluMut/$(basename $flumut_out)" | |
done | |
# Symlink selected results to the temporary directory | |
for sample in "${samples[@]}"; do | |
if [[ $VERBOSE == true ]]; then | |
info "Symlinking sample '$sample' results to $ZIP_PACKAGE_DIR" | |
fi | |
# Find the consensus sequence for the sample getting the absolute path | |
consensus=$(find "$OUTPUT_DIR/consensus/bcftools" -name "${sample}.consensus.fasta") | |
if [[ -z "$consensus" ]]; then | |
warning "Consensus sequence not found for sample '$sample'" | |
else | |
if [[ $VERBOSE == true ]]; then | |
info "Found consensus sequence for sample '$sample' at $consensus" | |
fi | |
mkdir -p "$ZIP_PACKAGE_DIR/$sample/"{Annotations,FluMut,Variants,GenoFLU} | |
ln -sf $(realpath "$consensus") "$ZIP_PACKAGE_DIR/$sample/${sample}.consensus.fasta" | |
ln -sf $(realpath "$subtyping_report") "$ZIP_PACKAGE_DIR/$sample/${sample}-nf-flu-subtyping-report.xlsx" | |
for flumut_out in "${flumut_outputs[@]}"; do | |
# if FluMut output file extension is '.fasta', skip it | |
if [[ $(basename "$flumut_out") == *.fasta ]]; then | |
continue | |
fi | |
ln -sf "$flumut_out" "$ZIP_PACKAGE_DIR/$sample/FluMut/${sample}-$(basename $flumut_out)" | |
done | |
vcfs=($(find "$OUTPUT_DIR/variants/" \( -name "${sample}.*.clair3.vcf.gz" -o -name "${sample}.*.freebayes.vcf" \) | uniq)) | |
if [[ $VERBOSE == true ]]; then | |
info "Found ${#vcfs[@]} VCF files for sample '$sample'" | |
info "1st VCF: ${vcfs[0]}" | |
info "1st VCF realpath: $(realpath ${vcfs[0]})" | |
fi | |
for vcf in "${vcfs[@]}"; do | |
ln -sf $(realpath "$vcf") "$ZIP_PACKAGE_DIR/$sample/Variants/$(basename $vcf)" | |
done | |
if [[ -d "$OUTPUT_DIR/annotation/$sample" ]]; then | |
annotations=() | |
annotations+=($(find "$OUTPUT_DIR/annotation/$sample" -name "${sample}.*" | uniq)) | |
if [[ -d "$OUTPUT_DIR/annotation/bcftools/$sample" ]]; then | |
annotations+=($(find "$OUTPUT_DIR/annotation/bcftools/$sample" -name "${sample}.*" | uniq)) | |
fi | |
for annotation in "${annotations[@]}"; do | |
ln -sf $(realpath "$annotation") "$ZIP_PACKAGE_DIR/$sample/Annotations/$(basename $annotation)" | |
done | |
fi | |
if [[ -d "$OUTPUT_DIR/mismatch_report" ]]; then | |
mismatch_report=$(find "$OUTPUT_DIR/mismatch_report" -name "${sample}-blastn-report.xlsx" | uniq | head -n1) | |
if [[ -z "$mismatch_report" ]]; then | |
warning "BLASTN mismatch report not found for sample '$sample'" | |
else | |
if [[ $VERBOSE == true ]]; then | |
info "Found mismatch report for sample '$sample' at '$mismatch_report'" | |
fi | |
ln -sf $(realpath "$mismatch_report") "$ZIP_PACKAGE_DIR/$sample/$(basename $mismatch_report)" | |
fi | |
fi | |
if [[ -d "$OUTPUT_DIR/genoflu" ]]; then | |
genoflu_out=($(find "$OUTPUT_DIR/genoflu/" -name "${sample}.*" | uniq)) | |
if [[ -z "$genoflu_out" ]]; then | |
warning "GenoFLU output not found for sample '$sample'" | |
else | |
if [[ $VERBOSE == true ]]; then | |
info "Found GenoFLU output for sample '$sample' at '$genoflu_out'" | |
fi | |
mkdir -p "$ZIP_PACKAGE_DIR/$sample/GenoFLU" | |
fi | |
for genoflu_out in "${genoflu_out[@]}"; do | |
ln -sf $(realpath "$genoflu_out") "$ZIP_PACKAGE_DIR/$sample/GenoFLU/$(basename $genoflu_out)" | |
done | |
fi | |
if [[ -d "$OUTPUT_DIR/nextclade" && -f "$OUTPUT_DIR/nextclade/nextclade.tsv" ]]; then | |
ln -sf $(realpath "$OUTPUT_DIR/nextclade/nextclade.tsv") "$ZIP_PACKAGE_DIR/$sample/${sample}.nextclade.tsv" | |
fi | |
fi | |
done | |
if [[ $VERBOSE == true ]]; then | |
info "Listing files in $ZIP_PACKAGE_DIR" | |
tree "$ZIP_PACKAGE_DIR" || true | |
fi | |
info "Creating zip package '$ZIP_PACKAGE_DIR.zip'..." | |
zip -r "$ZIP_PACKAGE_DIR.zip" "$ZIP_PACKAGE_DIR" | |
info "Created zip package at $(realpath "$ZIP_PACKAGE_DIR.zip")" | |
info "Creating tar.gz '${ZIP_PACKAGE_DIR}.tar.gz'" | |
tar --dereference -czf "${ZIP_PACKAGE_DIR}.tar.gz" "$ZIP_PACKAGE_DIR" | |
info "Created tar.gz package at $(realpath "$ZIP_PACKAGE_DIR.tar.gz")" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment