edawson · November 26, 2020 20:19
diff --git a/nextflow cheat sheet b/nextflow cheat sheet
 #Example 1:

 #!/usr/bin/env nextflow

 params.str = 'Hello world!'

 process AFcalc {

    """
    echo  '${params.str}'
    """
 }

 //this is necessary to print the output
 result.subscribe {
    println it.trim()
 }
 #If we run this script by doing:
 nextflow run decorate.nf
 //
 # Propagating parameters
 #!/usr/bin/env nextflow

 params.query = "test.py"

 query = file(params.query)

 process do_cat {
    input:
    file query

    output:
    file out_cat

    """
    cat $query > out_cat
    """
 }


 process do_another_thing {
    input:
    file out_cat

    """
    cat $out_cat
    """
 }

 result.subscribe {
    println it.trim()
 }

 #This script will print out:
 Hello world!

 #If now we run it by doing:
 ./bin/nextflow run decorate.nf --str 'Hola mundo'

 We are assigning a value for the --str param from the command line
 //
 #In the second example, we execute a python script that just prints out hello:
 #!/usr/bin/env nextflow

 params.script = "test.py"

 script = file(params.script)

 process AFcalc {
    input:
    file script

    """
    python $script
    """
 }

 result.subscribe {
    println it.trim()
 }
 //
 ### Configuration files:
 Write a file named 'nextflow.config' and put it in the dir where you will run nextflow

 #comments in the config file are writen as '//' or '/* .. */' to comment a block on multiple lines
 //
 #Ex1:
 #Passing a param to the script directive,
 #write the following to 'nextflow.config'
 params.script='test.py'

 #Then in the file with the analyses we write the following:
 #!/usr/bin/env nextflow


 script = file(params.script)

 process AFcalc {
    input:
    file script


    """
    python $script
    """
 }

 result.subscribe {
    println it.trim()
 }
 //
 / We named the channel to 'num'
 num = Channel.from( 'test1', 'test2', 'test3' )

 process basicExample {
  input:
  val x from num

  "echo process job $x"

 }

 result.subscribe {
    println it.trim()
 }
 /This will print:
 [e5/20785b] Submitted process > basicExample (1)
 [15/cd4308] Submitted process > basicExample (3)
 [71/b0abbe] Submitted process > basicExample (2)
 process job test1
 process job test3
 process job test2
 /
 #if we want to print the contents of a channel we just need to do:
 num = Channel.from( 'test1', 'test2', 'test3' )
 num.println()
 /
 When the val has the same name as the channel from where the data is received, the from part can be omitted. 

 num = Channel.from( 1, 2, 3 )

 process basicExample {
  input:
  val num

  "echo process job $num"

 }
 /
 #example of channel factory
 zeroToNine = Channel.from( 0..9 )

 process test {

    input:
    val x from zeroToNine

    """
    echo $x
    """
 }
 /
 #the same than above but creating the channel factory from a list:
 myList = [1,2,3,4]

 zeroToNine = Channel.from( myList )

 process test {

        input:
         val x from zeroToNine

        """
        echo $x
        """
 }
 /
 #Now, let's create a factory channel from a string composed of several
 #comma-separated elements:
 chr_str="chr1,chr2,chr3"

 chrList = Channel.from( chr_str.split(',') )

 process test {

        input:
         val x from chrList

        """
        echo $x
        """
 }
 //
 / Using a channel to read all files and operate on each particular file
 / In this case, we count the lines for each particular file

 files = Channel.fromPath( './*.py' )

 process countLines {
  input:
  file query_file from files

  "wc -l ${query_file}"

 }

 result.subscribe {
    println it.trim()
 }
 / This will print:
 [a1/2d4a9a] Submitted process > countLines (2)
 [99/124b71] Submitted process > countLines (1)
 1 test.py
 1 test1.py
 //
 # Pass the parameters from one process to the other

 #!/usr/bin/env nextflow

 params.query = "test.py"

 query = file(params.query)

 process do_cat {
    input:
    file query

    output:
    file out_cat

    """
    cat $query > out_cat
    """
 }


 process do_another_thing {
    input:
    file out_cat

    """
    cat $out_cat
    """
 }

 result.subscribe {
    println it.trim()
 }
 //
 #Create a an output channel an write the output to a file named
 #'result.txt' that will be put to a folder inside the 'work' folder
 #created by nextflow
 process randomNum {

   output:
   file 'result.txt' into numbers

   '''
   echo $RANDOM > result.txt
   '''

 }

 numbers.subscribe { println "Received: " + it.text }
 //
 # creating an output file from parameters used with the script

 params.outdir='17_09_2018/out'

 out_annot = file(params.outdir + "/annotation.txt")

 process randomNum {

        """
        echo "hello" > ${out_annot}
        """

 }

 result.subscribe {
    println it.trim()
 }
 //
 #Create an output filename from params and pass the output file to a different process

 params.outdir='17_09_2018/out'

 out_annot = file(params.outdir + "/annotation.txt")

 process randomNum {

        """
        echo "hello" > ${out_annot}
        """
 }

 process printFile {
        input:
        file out_annot

        """
        cat ${out_annot}
        """
 }

 result.subscribe {
    println it.trim()
 }
 //
 # Write some string and variable to the log

 out_Annotate=file(params.outdir+"/annot_tab2."+params.region+".txt")

 log.info "Hello: ${out_Annotate}"

 process Annotate {


        """
        python ${params.scripts_dir}/annotate.py --AFcalc ${params.scripts_dir} --phased_vcf ${params.phased_vcf} --sample_panel ${params.sample_panel} --tabix ${params.tabix} --region ${params.region} --pops ${params.pops} --exome ${params.exome} --outdir ${params.outdir} --ann_vcf ${params.ann_vcf}
        """
 }
 //
 # printing a string (Groovy syntax)
 println "Hello"
 //
 # replacing characters in a string
 println "aaaaa".replace("a", "b")
 //
 # replace in action:
 cutoff_values=[0.95,0.96]

 process test1 {

        input:
        each cutoff from cutoff_values

        output:
        file(output_cutoff)

        script:
        output_cutoff="${cutoff}".replace('.', '_')

        """
        touch "${output_cutoff}"
        """
 }
 # It will create 2 files named 0_95 and 0_96

 #2nd example:
 cutoff_values=[0.95,0.96]


 process test1 {

        input:
        each cutoff from cutoff_values

        output:
        file(output_cutoff)

        script:
        output_cutoff="${cutoff}".replace('.', '_')+".vcf"

        """
        touch "${output_cutoff}"
        """
 }
 # It will create 2 files named 0_95.vcf and 0_96.vcf
 //
 # running nexflow
 #
 #cleaning a project
 ./bin/nextflow clean -f
 //
 # Executor

 #In order to run all processes with lsf, send to a specific queue and requesting a certain cpu and memory, add the following
 #to nexflow.config

 process {
  executor='lsf'
  queue='production-rh7'
  cpus=1
  memory=1.GB
 }
 /
 # Running specific processes using a certain executor:
 process runFastqSimpleQA {
    /*
    An example process
    */

    memory '2 GB'
    executor 'lsf'
    cpus 2
    queue 'standard1'

    """
    echo "Hello"
    """
 }
 /
 # Factory from file
 # 'runs.txt' is splitted in 2 lines chunks and each of the 
 # chunks is processed by foo. In this factory, each of the lines in the file 
 # is considered a file and this is why the code in the 'script' part is considered
 # a string

 Channel
    .fromPath('runs.txt')
    .splitText(by: 2)
    .set{ chunks_ch }

 process foo {
  echo true
  input:
  file x from chunks_ch

  script:
  """
  rev $x | rev
  """
 }
 /
 #Factory from file ,in this case a list of run ids. Each line
 #is considered a string
 params.index = 'runs.txt'

 Channel
    .fromPath(params.index)
    .splitCsv(header:true)
    .map{ row-> row.runId }
    .set { runs_ch }

 process foo {
    input:
    val x from runs_ch

    script:
    """
    echo $x
    """
 }

 result.subscribe {
    println it.trim()
 }
 //
 #propagate a file and a value and pass them to another process
 #that will modify the file name
 process createFile {

  output:
  file 'test.txt' into output_f
  val 'runid' into output_v

  script:
  """
  touch test.txt
  """

 }

 process mvFile {

  input:
  file output_f
  val output_v

  """
  mv ${output_f} ${output_v}
  """
 }
 //
 # this factory reads a .csv file having different columns and will create
 # a variable for each column that can be used by the process
 params.index = 'runs.txt'

 Channel
    .fromPath(params.index)
    .splitCsv(header:true)
    .map{ row-> tuple(row.sampleId, file(row.read1), file(row.read2)) }
    .set { samples_ch }

 process foo {
    input:
    set sampleId, file(read1), file(read2) from samples_ch

    script:
    """
    echo your_command --sample $sampleId --reads $read1 $read2
    """
 }

 result.subscribe {
    println it.trim()
 }


 //
 #saving the output files of a process to a directory / folder

 process foo {
 publishDir 'results', saveAs:{ filename -> "foo_$filename" }

 output: 
 file '*.txt'

 '''
 touch this.txt
 touch that.txt
 '''
 }
 # It will generate:
 results/foo_that.txt
 results/foo_this.txt
 //
 # This workflow is interesting because it propagates 
 # a file and a value and the value is used by mvFile process
 # to name a new file and put this new file into 'results' folder

 process createFile {

  output:
  file 'test.txt' into output_f
  val 'runid' into output_v

  script:
  """
  touch test.txt
  """

 }

 process mvFile {
  publishDir 'results', saveAs:{ filename -> "$filename" }

  input:
  file output_f
  val output_v

  output:
  file "${output_v}.test1.txt"

  """
  mv ${output_f} ${output_v}.test1.txt
  """
 }
 //
 #Saving all files produced in a process to a certain dir:
 # This is the difference with respect to publishDir in combination with saveAs, 
 # which allow to save some of the files
 process foo {

    publishDir 'out/'

    output:
    file 'chunk_*' into letters

    '''
    printf 'Hola' | split -b 1 - chunk_
    '''
 }
 //
 #Saving all files produced in a process to a certain folder without creating the symbolic link
 publishDir "result", mode: 'copy', overwrite: true
 //
 # Use nextflow with Docker

 #First, put the following in your nextflow.config:

 process.container = 'variant_filtering:latest' # this is the name of the image
 docker.enabled = true 
 docker.runOptions = '--volume $HOME:$HOME --workdir $PWD' # Mount the files within the container

 #Then, you run your nextflow workflow as usual:
 nextflow -C test.config run test.nf --vcf input10.reheaded.vcf.gz
 #Where input10.reheaded.vcf.gz will in your local system
 //
 # Conditional process (downloaded from https://github.com/nextflow-io/patterns/blob/master/conditional-process.nf)

 #!/usr/bin/env nextflow

 /*
 * Copyright (c) 2018, Centre for Genomic Regulation (CRG).
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

 /*
  * author Paolo Di Tommaso <[email protected]>
  */

 params.flag = false

 process foo {
  output:
  file 'x.txt' into foo_ch
  when:
  !params.flag

  script:
  '''
  echo foo > x.txt
  '''
 }

 process bar {
  output:
  file 'x.txt' into bar_ch
  when:
  params.flag

  script:
  '''
  echo bar > x.txt
  '''
 }

 process omega {
  echo true
  input:
  file x from foo_ch.mix(bar_ch)

  script:
  """
  cat $x
  """
 }
 # This workflow will execute bar if params.flag=true and foo if params.flag=false
 So if you run:
 nextflow run conditional_process.nf
 You get:
 Launching `conditional_process.nf` [shrivelled_lalande] - revision: fab3a727cf
 [warm up] executor > local
 [6a/87f036] Submitted process > foo
 [fb/647063] Submitted process > omega (1)
 But if you run:
 nextflow run conditional_process.nf --flag
 You get:
 N E X T F L O W  ~  version 18.10.1
 Launching `conditional_process.nf` [awesome_sax] - revision: fab3a727cf
 [warm up] executor > local
 [6c/edf567] Submitted process > bar
 [0e/4b6f7d] Submitted process > omega (1)
 bar
 //
 # Nextflow environment variables:
 NXF_WORK # to set the work dir
 NXF_HOME # to set Nextflow home
 NXF_TEMP # to set Nextflow tmp dir
 //
 #Maximum number of processes to parallelize
 process doNotParallelizeIt {

   maxForks 1

   '''
   <your script here>
   '''

 }
 By default this value is equals to the number of CPU cores available minus 1.
 //
 #User the errorStrategy directive if you want to instruct nf what to do in case of an error
 memory '2 GB'
 executor 'lsf'
 errorStrategy 'ignore'
 //
 #setting the workind directory from the command line
 nextflow -C test.config run test.nf -w /path/to/workingdir
 //
 #mix operator to mix two channels:
 chr_str="chr1,chr2,chr3"
 letter_str="a,b,c"

 letterList = Channel.from( letter_str.split(','))
 chrList = Channel.from( chr_str.split(',') )

 process test {
        echo true

        input:
        val x from chrList.mix(letterList)

        """
        echo $x
        """
 }

 # This command will print out:
 chr3
 chr1
 a
 b
 c
 chr2
 //
 # Combine 2 channels (cartesian product)
 chr_str="chr1,chr2,chr3"
 letter_str="a,b,c"

 letterList = Channel.from( letter_str.split(','))
 chrList = Channel.from( chr_str.split(',') )

 process test {
        echo true

        input:
        set x,y from chrList.combine(letterList)

        """
        echo $x $y
        """
 }
 # Will print out:
 chr1 b
 chr2 c
 chr2 b
 chr3 c
 chr3 a
 chr1 a
 chr3 b
 chr1 c
 chr2 a
 //
 # How do I use the same channel multiple times:

 vegetable_datasets = Channel.fromPath(params.input)

 vegetable_datasets.into { datasets_clustalw; datasets_tcoffee }

 process clustalw2_align {
    input:
    file vegetable_fasta from datasets_clustalw

    output:
    file "${vegetable_fasta.baseName}.aln" into clustalw_alns

    script:
    """
    clustalw2 -INFILE=${vegetable_fasta}
    """
 }

 process tcoffee_align {
    input:
    file vegetable_fasta from datasets_tcoffee

    output:
    file "${vegetable_fasta.baseName}.aln" into tcoffee_alns

    script:
    """
    t_coffee ${vegetable_fasta}
    """
 }
 //
 #tag
 #The tag directive allows you to associate each process executions with a custom label, so that it will be easier to
 identify them in the log file or in the trace execution report. For example:
 process foo {
    tag { code }
    input:
    val code from 'alpha', 'gamma', 'omega'
    """
    echo $code
    """
 }
 And the log will have:
 [6e/28919b] Submitted process > foo (alpha)
 [d2/1c6175] Submitted process > foo (gamma)
 [1c/3ef220] Submitted process > foo (omega)
 /
 #tag mixing variable and text:
 tag "FASTQC on $sample_id"
 //
 #duplicating output to two channels within a process:
 process get_variant_annotations {
        /*
        Process to get the variant annotations for training files
        and for VCF file to annotate (for a single chromosome in this case)
        */

        memory '2 GB'
        executor 'local'
        queue "${params.queue}"
        cpus 1

        input:
        file tp_vcf
        file fp_vcf

        output:
        file 'TP_annotations.tsv.gz' into tp_annotations_train, tp_annotations_rfe
        file 'FP_annotations.tsv.gz' into fp_annotations_train, fp_annotations_rfe

        """
        bcftools query -H -f '${params.annotations}' ${tp_vcf} | bgzip -c > TP_annotations.tsv.gz
        bcftools query -H -f '${params.annotations}' ${fp_vcf} | bgzip -c > FP_annotations.tsv.gz
        """
 }
 //
 # Initializing an empty channel:
 chrs_splitmultiallelic_withchr=Channel.empty()

 if (params.region) {
    chrList = Channel.from( params.region.split(',') )

    chrList.into { chrs_splitmultiallelic_withchr ; chrs_intersecionCallSets; chrs_trainModel; chrs_rfe}
 }
 # The chrs_splitmultiallelic_withchr channel needs to be initialized out of the if scope
 //
 ##groovy syntax:
 /
 #print out 'hello' to STDOUT
 process test {

        script:

        println("hello")
        """
        echo "hello"
        """
 }
 //
 # defining and initializing a variable and printing to STDOUT the contents
 process test {

        def replace="hello"

        println(replace)

        script:
        """
        echo "hello"
        """
 }
 /
 #conditional if statement:
 # exec is used to execute Nextflow code
 process test {

        exec:
        def replace="hello"

        if (replace=="hello") {
           println(replace)
        }
 }
 /
 #scope of variables:
 process test {

        exec:
        def replace="hello"

        if (replace=="hello") {
           def bye="bye"
        }
        println(bye)
 }
 #Trying to print bye out of scope will throw an error:ERROR ~ Error executing process > 'test'

 Caused by:
  Unknown variable 'bye' -- Make sure you didn't misspell it or define somewhere in the script before use it
 #In order to fix this one needs to do:
 process test {

        exec:
        def replace="hello"
        def bye #or def bye=null is also valid

        if (replace=="hello") {
           bye="bye"
        }
        println(bye)
 }
 //
 #conditional execution based on a param:
 process test {

        script:

        if (params.flag) {
           println("hello")
        } else {
           println("bye")
        }

        """
        echo "hello"
        """
 }
 # if it is run by nextflow run test.nf --flag true then it will print out "hello"
 //
 #creating val inside process that will outputted to channel 
 process test1 {

        output:
        val outest into outtest

        exec:
                outest="hello"

 }

 outtest.println()
 # This will print:
 hello
 //
 process test1 {

        output:
        val outest into outtest

        outest="hello"

        script:

                """
                echo "hello"
                """

 }

 outtest.println()
 # This will crash with:
 ERROR ~ Error executing process > 'test1'

 Caused by:
  Missing value declared as output parameter: outest
 #because outest is not initialized within the script/exec block.
 #This will be fixed by doing:
 process test1 {

        output:
        val outest into outtest

        script:
               outest="hello"

               """
               echo "hello"
               """
 }

 outtest.println()
 //
 ////// Check input parameters //////
 if (!params.genome) {
  exit 1, "Please specify a genome file"
 }
 //
 #getting the basenames from files:
 iles = Channel.fromPath( './*.gz' )

 process printFiles {
  input:
  file query_file from files

  "echo ${query_file.baseName}"

 }

 result.subscribe {
    println it.trim()
 }
 //
 #with -q we cancel all the messages printed by nextflow at the beginning
 nextflow -q run.nf
 //
 #concat operator 
 c1 = Channel.from( 1,2,3 )
 c2 = Channel.from( 'a','b' )
 c3 = Channel.from( 'z' )

 c1.concat(c2).concat(c3).println()
 # print out in order, so it will print:
 1
 2
 3
 a
 b
 z
 //
 # I've got the following error after running Nexflow:
 Failed to write core dump. Core dumps have been disabled

 This seems to be fixed by entering the following command:
 ulimit -c unlimited
 //
 Restarting an older run that has failed:
 > nextflow log
 TIMESTAMP          	DURATION	RUN NAME        	STATUS	REVISION ID	SESSION ID                          	COMMAND                   
 2017-11-24 18:41:34	672ms   	ecstatic_noether	OK    	bab98280bf 	7a8fefda-c812-4842-9248-2fd1b8d1d1e1	nextflow run <your pipeline>
 2017-11-29 10:55:15	2.5s    	grave_lavoisier 	OK    	6a1acf3211 	56c9a1a1-ad16-4671-b98a-96adbd5051f2	nextflow run <your pipeline>          
 2017-11-29 10:55:29	-       	golden_roentgen 	-     	6a1acf3211 	6b12ae11-74d8-4395-9685-4bb91e05e324	nextflow run <your pipeline>          
 2017-11-29 09:57:37	6.2s    	silly_austin    	OK    	6a1acf3211 	a896b4da-4530-48e5-a519-39016adff6fb	nextflow run <your pipeline>  

 Check the ones that have STATUS=ERR and then get the RUN NAME, then you can resume that particular run by doing:
 nextflow run <your pipeline> -resume grave_lavoisier
 /
 #Checking working folders for proceses in a session:
 > nextflow log
 TIMESTAMP          	DURATION	RUN NAME        	STATUS	REVISION ID	SESSION ID                          	COMMAND                   
 2017-11-24 18:41:34	672ms   	ecstatic_noether	OK    	bab98280bf 	7a8fefda-c812-4842-9248-2fd1b8d1d1e1	nextflow run <your pipeline>
 2017-11-29 10:55:15	2.5s    	grave_lavoisier 	OK    	6a1acf3211 	56c9a1a1-ad16-4671-b98a-96adbd5051f2	nextflow run <your pipeline>          
 2017-11-29 10:55:29	-       	golden_roentgen 	-     	6a1acf3211 	6b12ae11-74d8-4395-9685-4bb91e05e324	nextflow run <your pipeline>          
 2017-11-29 09:57:37	6.2s    	silly_austin    	OK    	6a1acf3211 	a896b4da-4530-48e5-a519-39016adff6fb	nextflow run <your pipeline>
 > nextflow silly_austin
 //
 / #Combining 2 channels:

 #!/usr/bin/env nextflow

 echo true
 samples = Channel.from '10000', '20000', '30000'
 chrs = Channel.from 'chr1', 'chr2'
 all = samples.spread(chrs)

 all.println()

 # This will produce:
 [10000, chr1]
 [10000, chr2]
 [20000, chr1]
 [20000, chr2]
 [30000, chr1]
 [30000, chr2]

 //
 # Executor in a config file is used to set optional config settings for a given executor. For example, for 'lsf' put this in a config file
 that will be run in the command line using -C:
 executor {
    name = 'lsf'
    queueSize= 500
 }

 'queueSize' is used to set the limit of jobs submitted to cluster
 //
 # checking the progress of nextflow processes progress:
 1) Enter bjobs -w # examine the EXEC_HOST of the process you want to check
 2) ssh exec_host
 3) Enter 'ps ax |grep ernesto'
 4) Locat the workdir where nextflow is running the process
	#Example 1:

	#!/usr/bin/env nextflow

	params.str = 'Hello world!'

	process AFcalc {

	"""
	echo '${params.str}'
	"""
	}

	//this is necessary to print the output
	result.subscribe {
	println it.trim()
	}
	#If we run this script by doing:
	nextflow run decorate.nf
	//
	# Propagating parameters
	#!/usr/bin/env nextflow

	params.query = "test.py"

	query = file(params.query)

	process do_cat {
	input:
	file query

	output:
	file out_cat

	"""
	cat $query > out_cat
	"""
	}


	process do_another_thing {
	input:
	file out_cat

	"""
	cat $out_cat
	"""
	}

	result.subscribe {
	println it.trim()
	}

	#This script will print out:
	Hello world!

	#If now we run it by doing:
	./bin/nextflow run decorate.nf --str 'Hola mundo'

	We are assigning a value for the --str param from the command line
	//
	#In the second example, we execute a python script that just prints out hello:
	#!/usr/bin/env nextflow

	params.script = "test.py"

	script = file(params.script)

	process AFcalc {
	input:
	file script

	"""
	python $script
	"""
	}

	result.subscribe {
	println it.trim()
	}
	//
	### Configuration files:
	Write a file named 'nextflow.config' and put it in the dir where you will run nextflow

	#comments in the config file are writen as '//' or '/* .. */' to comment a block on multiple lines
	//
	#Ex1:
	#Passing a param to the script directive,
	#write the following to 'nextflow.config'
	params.script='test.py'

	#Then in the file with the analyses we write the following:
	#!/usr/bin/env nextflow


	script = file(params.script)

	process AFcalc {
	input:
	file script


	"""
	python $script
	"""
	}

	result.subscribe {
	println it.trim()
	}
	//
	/ We named the channel to 'num'
	num = Channel.from( 'test1', 'test2', 'test3' )

	process basicExample {
	input:
	val x from num

	"echo process job $x"

	}

	result.subscribe {
	println it.trim()
	}
	/This will print:
	[e5/20785b] Submitted process > basicExample (1)
	[15/cd4308] Submitted process > basicExample (3)
	[71/b0abbe] Submitted process > basicExample (2)
	process job test1
	process job test3
	process job test2
	/
	#if we want to print the contents of a channel we just need to do:
	num = Channel.from( 'test1', 'test2', 'test3' )
	num.println()
	/
	When the val has the same name as the channel from where the data is received, the from part can be omitted.

	num = Channel.from( 1, 2, 3 )

	process basicExample {
	input:
	val num

	"echo process job $num"

	}
	/
	#example of channel factory
	zeroToNine = Channel.from( 0..9 )

	process test {

	input:
	val x from zeroToNine

	"""
	echo $x
	"""
	}
	/
	#the same than above but creating the channel factory from a list:
	myList = [1,2,3,4]

	zeroToNine = Channel.from( myList )

	process test {

	input:
	val x from zeroToNine

	"""
	echo $x
	"""
	}
	/
	#Now, let's create a factory channel from a string composed of several
	#comma-separated elements:
	chr_str="chr1,chr2,chr3"

	chrList = Channel.from( chr_str.split(',') )

	process test {

	input:
	val x from chrList

	"""
	echo $x
	"""
	}
	//
	/ Using a channel to read all files and operate on each particular file
	/ In this case, we count the lines for each particular file

	files = Channel.fromPath( './*.py' )

	process countLines {
	input:
	file query_file from files

	"wc -l ${query_file}"

	}

	result.subscribe {
	println it.trim()
	}
	/ This will print:
	[a1/2d4a9a] Submitted process > countLines (2)
	[99/124b71] Submitted process > countLines (1)
	1 test.py
	1 test1.py
	//
	# Pass the parameters from one process to the other

	#!/usr/bin/env nextflow

	params.query = "test.py"

	query = file(params.query)

	process do_cat {
	input:
	file query

	output:
	file out_cat

	"""
	cat $query > out_cat
	"""
	}


	process do_another_thing {
	input:
	file out_cat

	"""
	cat $out_cat
	"""
	}

	result.subscribe {
	println it.trim()
	}
	//
	#Create a an output channel an write the output to a file named
	#'result.txt' that will be put to a folder inside the 'work' folder
	#created by nextflow
	process randomNum {

	output:
	file 'result.txt' into numbers

	'''
	echo $RANDOM > result.txt
	'''

	}

	numbers.subscribe { println "Received: " + it.text }
	//
	# creating an output file from parameters used with the script

	params.outdir='17_09_2018/out'

	out_annot = file(params.outdir + "/annotation.txt")

	process randomNum {

	"""
	echo "hello" > ${out_annot}
	"""

	}

	result.subscribe {
	println it.trim()
	}
	//
	#Create an output filename from params and pass the output file to a different process

	params.outdir='17_09_2018/out'

	out_annot = file(params.outdir + "/annotation.txt")

	process randomNum {

	"""
	echo "hello" > ${out_annot}
	"""
	}

	process printFile {
	input:
	file out_annot

	"""
	cat ${out_annot}
	"""
	}

	result.subscribe {
	println it.trim()
	}
	//
	# Write some string and variable to the log

	out_Annotate=file(params.outdir+"/annot_tab2."+params.region+".txt")

	log.info "Hello: ${out_Annotate}"

	process Annotate {


	"""
	python ${params.scripts_dir}/annotate.py --AFcalc ${params.scripts_dir} --phased_vcf ${params.phased_vcf} --sample_panel ${params.sample_panel} --tabix ${params.tabix} --region ${params.region} --pops ${params.pops} --exome ${params.exome} --outdir ${params.outdir} --ann_vcf ${params.ann_vcf}
	"""
	}
	//
	# printing a string (Groovy syntax)
	println "Hello"
	//
	# replacing characters in a string
	println "aaaaa".replace("a", "b")
	//
	# replace in action:
	cutoff_values=[0.95,0.96]

	process test1 {

	input:
	each cutoff from cutoff_values

	output:
	file(output_cutoff)

	script:
	output_cutoff="${cutoff}".replace('.', '_')

	"""
	touch "${output_cutoff}"
	"""
	}
	# It will create 2 files named 0_95 and 0_96

	#2nd example:
	cutoff_values=[0.95,0.96]


	process test1 {

	input:
	each cutoff from cutoff_values

	output:
	file(output_cutoff)

	script:
	output_cutoff="${cutoff}".replace('.', '_')+".vcf"

	"""
	touch "${output_cutoff}"
	"""
	}
	# It will create 2 files named 0_95.vcf and 0_96.vcf
	//
	# running nexflow
	#
	#cleaning a project
	./bin/nextflow clean -f
	//
	# Executor

	#In order to run all processes with lsf, send to a specific queue and requesting a certain cpu and memory, add the following
	#to nexflow.config

	process {
	executor='lsf'
	queue='production-rh7'
	cpus=1
	memory=1.GB
	}
	/
	# Running specific processes using a certain executor:
	process runFastqSimpleQA {
	/*
	An example process
	*/

	memory '2 GB'
	executor 'lsf'
	cpus 2
	queue 'standard1'

	"""
	echo "Hello"
	"""
	}
	/
	# Factory from file
	# 'runs.txt' is splitted in 2 lines chunks and each of the
	# chunks is processed by foo. In this factory, each of the lines in the file
	# is considered a file and this is why the code in the 'script' part is considered
	# a string

	Channel
	.fromPath('runs.txt')
	.splitText(by: 2)
	.set{ chunks_ch }

	process foo {
	echo true
	input:
	file x from chunks_ch

	script:
	"""
	rev $x \| rev
	"""
	}
	/
	#Factory from file ,in this case a list of run ids. Each line
	#is considered a string
	params.index = 'runs.txt'

	Channel
	.fromPath(params.index)
	.splitCsv(header:true)
	.map{ row-> row.runId }
	.set { runs_ch }

	process foo {
	input:
	val x from runs_ch

	script:
	"""
	echo $x
	"""
	}

	result.subscribe {
	println it.trim()
	}
	//
	#propagate a file and a value and pass them to another process
	#that will modify the file name
	process createFile {

	output:
	file 'test.txt' into output_f
	val 'runid' into output_v

	script:
	"""
	touch test.txt
	"""

	}

	process mvFile {

	input:
	file output_f
	val output_v

	"""
	mv ${output_f} ${output_v}
	"""
	}
	//
	# this factory reads a .csv file having different columns and will create
	# a variable for each column that can be used by the process
	params.index = 'runs.txt'

	Channel
	.fromPath(params.index)
	.splitCsv(header:true)
	.map{ row-> tuple(row.sampleId, file(row.read1), file(row.read2)) }
	.set { samples_ch }

	process foo {
	input:
	set sampleId, file(read1), file(read2) from samples_ch

	script:
	"""
	echo your_command --sample $sampleId --reads $read1 $read2
	"""
	}

	result.subscribe {
	println it.trim()
	}


	//
	#saving the output files of a process to a directory / folder

	process foo {
	publishDir 'results', saveAs:{ filename -> "foo_$filename" }

	output:
	file '*.txt'

	'''
	touch this.txt
	touch that.txt
	'''
	}
	# It will generate:
	results/foo_that.txt
	results/foo_this.txt
	//
	# This workflow is interesting because it propagates
	# a file and a value and the value is used by mvFile process
	# to name a new file and put this new file into 'results' folder

	process createFile {

	output:
	file 'test.txt' into output_f
	val 'runid' into output_v

	script:
	"""
	touch test.txt
	"""

	}

	process mvFile {
	publishDir 'results', saveAs:{ filename -> "$filename" }

	input:
	file output_f
	val output_v

	output:
	file "${output_v}.test1.txt"

	"""
	mv ${output_f} ${output_v}.test1.txt
	"""
	}
	//
	#Saving all files produced in a process to a certain dir:
	# This is the difference with respect to publishDir in combination with saveAs,
	# which allow to save some of the files
	process foo {

	publishDir 'out/'

	output:
	file 'chunk_*' into letters

	'''
	printf 'Hola' \| split -b 1 - chunk_
	'''
	}
	//
	#Saving all files produced in a process to a certain folder without creating the symbolic link
	publishDir "result", mode: 'copy', overwrite: true
	//
	# Use nextflow with Docker

	#First, put the following in your nextflow.config:

	process.container = 'variant_filtering:latest' # this is the name of the image
	docker.enabled = true
	docker.runOptions = '--volume $HOME:$HOME --workdir $PWD' # Mount the files within the container

	#Then, you run your nextflow workflow as usual:
	nextflow -C test.config run test.nf --vcf input10.reheaded.vcf.gz
	#Where input10.reheaded.vcf.gz will in your local system
	//
	# Conditional process (downloaded from https://github.com/nextflow-io/patterns/blob/master/conditional-process.nf)

	#!/usr/bin/env nextflow

	/*
	* Copyright (c) 2018, Centre for Genomic Regulation (CRG).
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	/*
	* author Paolo Di Tommaso <[email protected]>
	*/

	params.flag = false

	process foo {
	output:
	file 'x.txt' into foo_ch
	when:
	!params.flag

	script:
	'''
	echo foo > x.txt
	'''
	}

	process bar {
	output:
	file 'x.txt' into bar_ch
	when:
	params.flag

	script:
	'''
	echo bar > x.txt
	'''
	}

	process omega {
	echo true
	input:
	file x from foo_ch.mix(bar_ch)

	script:
	"""
	cat $x
	"""
	}
	# This workflow will execute bar if params.flag=true and foo if params.flag=false
	So if you run:
	nextflow run conditional_process.nf
	You get:
	Launching `conditional_process.nf` [shrivelled_lalande] - revision: fab3a727cf
	[warm up] executor > local
	[6a/87f036] Submitted process > foo
	[fb/647063] Submitted process > omega (1)
	But if you run:
	nextflow run conditional_process.nf --flag
	You get:
	N E X T F L O W ~ version 18.10.1
	Launching `conditional_process.nf` [awesome_sax] - revision: fab3a727cf
	[warm up] executor > local
	[6c/edf567] Submitted process > bar
	[0e/4b6f7d] Submitted process > omega (1)
	bar
	//
	# Nextflow environment variables:
	NXF_WORK # to set the work dir
	NXF_HOME # to set Nextflow home
	NXF_TEMP # to set Nextflow tmp dir
	//
	#Maximum number of processes to parallelize
	process doNotParallelizeIt {

	maxForks 1

	'''
	<your script here>
	'''

	}
	By default this value is equals to the number of CPU cores available minus 1.
	//
	#User the errorStrategy directive if you want to instruct nf what to do in case of an error
	memory '2 GB'
	executor 'lsf'
	errorStrategy 'ignore'
	//
	#setting the workind directory from the command line
	nextflow -C test.config run test.nf -w /path/to/workingdir
	//
	#mix operator to mix two channels:
	chr_str="chr1,chr2,chr3"
	letter_str="a,b,c"

	letterList = Channel.from( letter_str.split(','))
	chrList = Channel.from( chr_str.split(',') )

	process test {
	echo true

	input:
	val x from chrList.mix(letterList)

	"""
	echo $x
	"""
	}

	# This command will print out:
	chr3
	chr1
	a
	b
	c
	chr2
	//
	# Combine 2 channels (cartesian product)
	chr_str="chr1,chr2,chr3"
	letter_str="a,b,c"

	letterList = Channel.from( letter_str.split(','))
	chrList = Channel.from( chr_str.split(',') )

	process test {
	echo true

	input:
	set x,y from chrList.combine(letterList)

	"""
	echo $x $y
	"""
	}
	# Will print out:
	chr1 b
	chr2 c
	chr2 b
	chr3 c
	chr3 a
	chr1 a
	chr3 b
	chr1 c
	chr2 a
	//
	# How do I use the same channel multiple times:

	vegetable_datasets = Channel.fromPath(params.input)

	vegetable_datasets.into { datasets_clustalw; datasets_tcoffee }

	process clustalw2_align {
	input:
	file vegetable_fasta from datasets_clustalw

	output:
	file "${vegetable_fasta.baseName}.aln" into clustalw_alns

	script:
	"""
	clustalw2 -INFILE=${vegetable_fasta}
	"""
	}

	process tcoffee_align {
	input:
	file vegetable_fasta from datasets_tcoffee

	output:
	file "${vegetable_fasta.baseName}.aln" into tcoffee_alns

	script:
	"""
	t_coffee ${vegetable_fasta}
	"""
	}
	//
	#tag
	#The tag directive allows you to associate each process executions with a custom label, so that it will be easier to
	identify them in the log file or in the trace execution report. For example:
	process foo {
	tag { code }
	input:
	val code from 'alpha', 'gamma', 'omega'
	"""
	echo $code
	"""
	}
	And the log will have:
	[6e/28919b] Submitted process > foo (alpha)
	[d2/1c6175] Submitted process > foo (gamma)
	[1c/3ef220] Submitted process > foo (omega)
	/
	#tag mixing variable and text:
	tag "FASTQC on $sample_id"
	//
	#duplicating output to two channels within a process:
	process get_variant_annotations {
	/*
	Process to get the variant annotations for training files
	and for VCF file to annotate (for a single chromosome in this case)
	*/

	memory '2 GB'
	executor 'local'
	queue "${params.queue}"
	cpus 1

	input:
	file tp_vcf
	file fp_vcf

	output:
	file 'TP_annotations.tsv.gz' into tp_annotations_train, tp_annotations_rfe
	file 'FP_annotations.tsv.gz' into fp_annotations_train, fp_annotations_rfe

	"""
	bcftools query -H -f '${params.annotations}' ${tp_vcf} \| bgzip -c > TP_annotations.tsv.gz
	bcftools query -H -f '${params.annotations}' ${fp_vcf} \| bgzip -c > FP_annotations.tsv.gz
	"""
	}
	//
	# Initializing an empty channel:
	chrs_splitmultiallelic_withchr=Channel.empty()

	if (params.region) {
	chrList = Channel.from( params.region.split(',') )

	chrList.into { chrs_splitmultiallelic_withchr ; chrs_intersecionCallSets; chrs_trainModel; chrs_rfe}
	}
	# The chrs_splitmultiallelic_withchr channel needs to be initialized out of the if scope
	//
	##groovy syntax:
	/
	#print out 'hello' to STDOUT
	process test {

	script:

	println("hello")
	"""
	echo "hello"
	"""
	}
	//
	# defining and initializing a variable and printing to STDOUT the contents
	process test {

	def replace="hello"

	println(replace)

	script:
	"""
	echo "hello"
	"""
	}
	/
	#conditional if statement:
	# exec is used to execute Nextflow code
	process test {

	exec:
	def replace="hello"

	if (replace=="hello") {
	println(replace)
	}
	}
	/
	#scope of variables:
	process test {

	exec:
	def replace="hello"

	if (replace=="hello") {
	def bye="bye"
	}
	println(bye)
	}
	#Trying to print bye out of scope will throw an error:ERROR ~ Error executing process > 'test'

	Caused by:
	Unknown variable 'bye' -- Make sure you didn't misspell it or define somewhere in the script before use it
	#In order to fix this one needs to do:
	process test {

	exec:
	def replace="hello"
	def bye #or def bye=null is also valid

	if (replace=="hello") {
	bye="bye"
	}
	println(bye)
	}
	//
	#conditional execution based on a param:
	process test {

	script:

	if (params.flag) {
	println("hello")
	} else {
	println("bye")
	}

	"""
	echo "hello"
	"""
	}
	# if it is run by nextflow run test.nf --flag true then it will print out "hello"
	//
	#creating val inside process that will outputted to channel
	process test1 {

	output:
	val outest into outtest

	exec:
	outest="hello"

	}

	outtest.println()
	# This will print:
	hello
	//
	process test1 {

	output:
	val outest into outtest

	outest="hello"

	script:

	"""
	echo "hello"
	"""

	}

	outtest.println()
	# This will crash with:
	ERROR ~ Error executing process > 'test1'

	Caused by:
	Missing value declared as output parameter: outest
	#because outest is not initialized within the script/exec block.
	#This will be fixed by doing:
	process test1 {

	output:
	val outest into outtest

	script:
	outest="hello"

	"""
	echo "hello"
	"""
	}

	outtest.println()
	//
	////// Check input parameters //////
	if (!params.genome) {
	exit 1, "Please specify a genome file"
	}
	//
	#getting the basenames from files:
	iles = Channel.fromPath( './*.gz' )

	process printFiles {
	input:
	file query_file from files

	"echo ${query_file.baseName}"

	}

	result.subscribe {
	println it.trim()
	}
	//
	#with -q we cancel all the messages printed by nextflow at the beginning
	nextflow -q run.nf
	//
	#concat operator
	c1 = Channel.from( 1,2,3 )
	c2 = Channel.from( 'a','b' )
	c3 = Channel.from( 'z' )

	c1.concat(c2).concat(c3).println()
	# print out in order, so it will print:
	1
	2
	3
	a
	b
	z
	//
	# I've got the following error after running Nexflow:
	Failed to write core dump. Core dumps have been disabled

	This seems to be fixed by entering the following command:
	ulimit -c unlimited
	//
	Restarting an older run that has failed:
	> nextflow log
	TIMESTAMP DURATION RUN NAME STATUS REVISION ID SESSION ID COMMAND
	2017-11-24 18:41:34 672ms ecstatic_noether OK bab98280bf 7a8fefda-c812-4842-9248-2fd1b8d1d1e1 nextflow run <your pipeline>
	2017-11-29 10:55:15 2.5s grave_lavoisier OK 6a1acf3211 56c9a1a1-ad16-4671-b98a-96adbd5051f2 nextflow run <your pipeline>
	2017-11-29 10:55:29 - golden_roentgen - 6a1acf3211 6b12ae11-74d8-4395-9685-4bb91e05e324 nextflow run <your pipeline>
	2017-11-29 09:57:37 6.2s silly_austin OK 6a1acf3211 a896b4da-4530-48e5-a519-39016adff6fb nextflow run <your pipeline>

	Check the ones that have STATUS=ERR and then get the RUN NAME, then you can resume that particular run by doing:
	nextflow run <your pipeline> -resume grave_lavoisier
	/
	#Checking working folders for proceses in a session:
	> nextflow log
	TIMESTAMP DURATION RUN NAME STATUS REVISION ID SESSION ID COMMAND
	2017-11-24 18:41:34 672ms ecstatic_noether OK bab98280bf 7a8fefda-c812-4842-9248-2fd1b8d1d1e1 nextflow run <your pipeline>
	2017-11-29 10:55:15 2.5s grave_lavoisier OK 6a1acf3211 56c9a1a1-ad16-4671-b98a-96adbd5051f2 nextflow run <your pipeline>
	2017-11-29 10:55:29 - golden_roentgen - 6a1acf3211 6b12ae11-74d8-4395-9685-4bb91e05e324 nextflow run <your pipeline>
	2017-11-29 09:57:37 6.2s silly_austin OK 6a1acf3211 a896b4da-4530-48e5-a519-39016adff6fb nextflow run <your pipeline>
	> nextflow silly_austin
	//
	/ #Combining 2 channels:

	#!/usr/bin/env nextflow

	echo true
	samples = Channel.from '10000', '20000', '30000'
	chrs = Channel.from 'chr1', 'chr2'
	all = samples.spread(chrs)

	all.println()

	# This will produce:
	[10000, chr1]
	[10000, chr2]
	[20000, chr1]
	[20000, chr2]
	[30000, chr1]
	[30000, chr2]

	//
	# Executor in a config file is used to set optional config settings for a given executor. For example, for 'lsf' put this in a config file
	that will be run in the command line using -C:
	executor {
	name = 'lsf'
	queueSize= 500
	}

	'queueSize' is used to set the limit of jobs submitted to cluster
	//
	# checking the progress of nextflow processes progress:
	1) Enter bjobs -w # examine the EXEC_HOST of the process you want to check
	2) ssh exec_host
	3) Enter 'ps ax \|grep ernesto'
	4) Locat the workdir where nextflow is running the process