Notes: 1.) When running jobs, always keep an eye on the scheduler. Some jobs may warrant requests for walltime extensions. 2.) Always check your output files for error messages to ensure your job completely correctly. 3.) Always double check your scripts. Files WILL overwrite.
download raw data and back up immediately
- Trimomatic -run separately for each replicate pair
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=72:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load jdk/1.8.0_45
module load trimmomatic/0.33
java -jar /opt/trimmomatic/0.33/trimmomatic-0.33.jar PE -phred33 -trimlog trimlog_mC1.txt \
/scratch/asecas86/Buster_denovo/merged/DH169_CT1_R1.fastq.gz /scratch/willyerd/Buster_denovo/merged/DH169_CT1_R2.fastq.gz \
m169C1_paired_a.fq.gz m169C1_unpaired_a.fq.gz m169C1_paired_b.fq.gz m169C1_unpaired_b.fq.gz \
ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25
-
fastqc - download desktop app https://www.bioinformatics.babraham.ac.uk/projects/download.html --only work with paired files
-
interleave paired reads (from trmmomatic output, a is left reads, b is right reads) -run for each pair of sequences for paired reads only
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load khmer/1.4.1
interleave-reads.py ../m169C1_paired_a.fq.gz ../m169C1_paired_b.fq.gz -o DH169_C1_paired.fq
- normalized pairs (normalize areas with excess coverage using paired read file) -run for each paired read set
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load khmer/1.4.1
normalize-by-median.py -C 30 -k 25 -N 4 -x 6e9 -p --savegraph 169c1_normC30k25.ct -o - DH169_C1_paired.fq >> m169C1_norm30x.fq
- split normaliaed pairs for down stream analysis
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=48:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load khmer/1.4.1
split-paired-reads.py -0 169C1_orphan.fq -1 169C1_left.fq -2 169C1_right.fq ../m169C1_norm30x.fq.gz
- hisat2 alignment of split pairs and unpaired reads to csw reference (in preparation for genome guided trinity)
A.) index reference
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]
cd $PBS_O_WORKDIR
module load hisat2/2.1.0
hisat2-build -f -p 12 161010_Chinese_Spring_v1.0_pseudomolecules.fasta csw
B.) align reads to indexed reference
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]
cd $PBS_O_WORKDIR
module load hisat2/2.1.0
hisat2 -q -p 12 --phred33 -x csw -1 169C1_left.fq,169C2_left.fq,169D1_left.fq,169D2_left.fq,173C1_left.fq,173C2_left.fq,173D3_left.fq,173D4_left.fq -2 169C1_right.fq,169C2_right.fq,169D1_right.fq,169D2_right.fq,173C1_right.fq,173C2_right.fq,173D3_right.fq,173D4_right.fq -U m169C1_unpaired_a.fq.gz,m169C1_unpaired_b.fq.gz,m169C2_unpaired_a.fq.gz,m169C2_unpaired_b.fq.gz,m169D1_unpaired_a.fq.gz,m169D1_unpaired_b.fq.gz,m169D2_unpaired_a.fq.gz,m169D2_unpaired_b.fq.gz,m173C1_unpaired_a.fq.gz,m173C1_unpaired_b.fq.gz,m173C2_unpaired_a.fq.gz,m173C2_unpaired_b.fq.gz,m173D3_unpaired_a.fq.gz,m173D3_unpaired_b.fq.gz,m173D4_unpaired_a.fq.gz,m173D4_unpaired_b.fq.gz -S CSWnorm_all.sam
- samtools: A.) convert output .sam to .bam
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load samtools/1.3.1
samtools view -bS -@ 12 GGall_CSW.sam > GGall_CSW.bam
B.) cordinate sort .bam file
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load samtools/1.3.1
samtools sort --threads 12 -m 2G GGall_CSW.bam > GGall_CSW.sorted.bam
C.) index sorted bam (for Jbrowse)
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load samtools/1.3.1
samtools index GGall_CSW.sorted.bam
- GGtrinity with cord sort bam
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load trinity/2.3.2
Trinity --genome_guided_bam CSW_all.sorted.bam --max_memory 250G --no_bowtie --genome_guided_max_intron 50000 --CPU 12
- run stats and quality assesment of assembly -read representation
A.) basic stats
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=24:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load trinity
/opt/trinity/2.3.2/util/TrinityStats.pl Trinity.fasta
B.) ExN50- need matrices before this can be done
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load trinity
/opt/trinity/2.3.2/util/misc/contig_ExN50_statistic.pl m169CD1matrix.TMM.EXPR.matrix m169CD1_trinity.fasta > CD1ExN50.stats
C.) plot ExN50
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load trinity
/opt/trinity/2.3.2/util/misc/plot_ExN50_statistic.Rscript ExN50.stats
- hisat align reads back to assembly - create index of assembly
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]
cd $PBS_O_WORKDIR
module load hisat2/2.1.0
hisat2 -q -p 12 --phred33 -x AllTrinIdx \
-1 m173C1_paired_a.fq.gz,m173C2_paired_a.fq.gz,m173D3_paired_a.fq.gz,m173D4_paired_a.fq.gz,\
m169C1_paired_a.fq.gz,m169C2_paired_a.fq.gz,m169D1_paired_a.fq.gz,m169D2_paired_a.fq.gz \
-2 m173C1_paired_b.fq.gz,m173C2_paired_b.fq.gz,m173D3_paired_b.fq.gz,m173D4_paired_b.fq.gz,\
m169C1_paired_b.fq.gz,m169C2_paired_b.fq.gz,m169D1_paired_b.fq.gz,m169D2_paired_b.fq.gz \
-U m173C1_unpaired_a.fq.gz,m173C1_unpaired_b.fq.gz,m173C2_unpaired_a.fq.gz,m173C2_unpaired_b.fq.gz,\
m173D3_unpaired_a.fq.gz,m173D3_unpaired_b.fq.gz,m173D4_unpaired_a.fq.gz,m173D4_unpaired_b.fq.gz,\
m169C1_unpaired_a.fq.gz,m169C1_unpaired_b.fq.gz,m169C2_unpaired_a.fq.gz,m169C2_unpaired_b.fq.gz,\
m169D1_unpaired_a.fq.gz,m169D1_unpaired_b.fq.gz,m169D2_unpaired_a.fq.gz,m169D2_unpaired_b.fq.gz -S GGtrin_AlnAll.sam
- hisat align assembly to csw (for Jbrowse)
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
cd $PBS_O_WORKDIR
module load hisat2
hisat2 -f -x /scratch/willyerd/Buster_denovo/m173trinity/hisat/csw/CSW -p 12 -U ../Trinity-allGG.fasta -S GGall_CSW.sam