Last active
June 5, 2024 02:00
-
-
Save philippbayer/79256a6e59df6a52e8e4ac90e7b48a7e to your computer and use it in GitHub Desktop.
My current Pawsey nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// have this as nextflow.config in the folder of your run for Pawseys Setonix | |
// i settled on this command for nf-core/mag: | |
// nextflow run nf-core/mag --input '*{R{1,2}.fastq.gz' --outdir results | |
// --skip_spades --cat_db https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20210107.tar.gz | |
// --gtdb 'https://data.gtdb.ecogenomic.org/releases/release202/202.0/auxillary_files/gtdbtk_r202_data.tar.gz' | |
// -resume -profile singularity | |
// --refine_bins_dastool --postbinning_input both | |
// --busco_download_path /SOMEWHERE/busco-data.ezlab.org/v5/data | |
// --disable-jobs-cancellation | |
// disable-jobs-cancellation is super-useful for the 'main' submitting job on Setonix, as there's a 24H walltime limit. | |
// if that limit is hit the main job is canceled and all currently running jobs are, too. This setting avoids that the currently-running | |
// sub-jobs are canceled. | |
// I download the busco data manually from busco-data.ezlab.org/v5/data, then extract all downloaded tar.gz files in their folders | |
// - else each BUSCO job will download lineage data separately and if you have thousands it's going to be weid | |
// | |
// I use DAStool, and I use both unrefined and DAS outputs - DAS removes some noise but some weirder outliers may be removed. | |
// Good to look at both | |
// | |
// I usually skip SPADES as it adds a lot of time but results in (for me, so far) worse assemblies | |
// | |
// BUSCO and KRONA make tons and tons of files, which is why I set their scratch folder to /tmp. If either job crashes, | |
// remove that line as /tmp will be on HPC nodes and you might not see important logs on the login node. | |
// Same for GTDBTK - /tmp on these nodes is memory, so sometimes jobs can run out of 'space' when in reality, they ran out of memory. | |
// in these cases, replace /tmp with something else or just delete the line | |
// Replace $ACCOUNT by whatever your Pawsey account group is | |
// IMPORTANT: include this line in the SLURM script submitting hte master nextflow job: | |
// unset SBATCH_EXPORT | |
// So the child jobs will have the Singularity module loaded. See https://support.pawsey.org.au/documentation/display/US/Nextflow | |
cleanup = true | |
resume = true | |
process { | |
cache = 'lenient' | |
singularity.enabled = true | |
singularity.autoMounts = true | |
stageInMode = 'symlink' | |
module = 'singularity/4.1.0-nompi' | |
singularity { | |
enabled = true | |
envWhitelist = 'SINGULARITY_BINDPATH, SINGULARITYENV_LD_LIBRARY_PATH, SINGULARITYENV_LD_PRELOAD' | |
} | |
executor='slurm' | |
queue = {task.memory < 110.GB ? 'work' : 'highmem'} | |
clusterOptions = "--account=${System.getenv('PAWSEY_PROJECT')}" | |
withName: 'BOWTIE2_ASSEMBLY_ALIGN|BOWTIE2_PHIX_REMOVAL_ALIGN' { | |
cpus = { 36 } | |
memory = { 50.GB } | |
time = { 16.h } | |
errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } | |
} | |
withName: PROKKA { | |
cpus = { 36 } | |
time = { 16.h } | |
errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } | |
} | |
withName: MEGAHIT { | |
cpus = {36} | |
time = {24.h} | |
memory = {200.GB} | |
queue = {'highmem'} | |
} | |
// Some stuff for epi2me | |
withName: fastcat { | |
cpus = { 36 } | |
time = { 16.h } | |
} | |
withName: 'kraken_pipeline:run_kraken2|run_kraken2' { | |
cpus = {36} | |
time = {24.h} | |
memory = {700.GB} | |
queue = {'highmem'} | |
} | |
withName: 'BUSCO|KRONA' { | |
scratch = '/tmp' | |
} | |
withName: BOWTIE2_ASSEMBLY_BUILD { | |
memory = 50.GB | |
} | |
withName: GTDBTK_CLASSIFY { | |
queue = 'highmem' | |
time = { 24.h} | |
memory = 300.GB | |
scratch = '/tmp' | |
} | |
withName: BUSCO { | |
memory = 50.GB | |
} | |
} | |
executor { | |
queueSize=100 | |
$slurm { | |
pollInterval = '1 min' | |
queueStatInterval = '5 min' | |
} | |
$local { | |
pollInterval = '2 sec' | |
} | |
} | |
params { | |
max_cpus = 36 | |
max_time = 24.h | |
} | |
env { | |
PYTHONNOUSERSITE = 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment