Skip to content

Instantly share code, notes, and snippets.

@andrese52
Last active March 22, 2018 03:52
Show Gist options
  • Save andrese52/55a20954defe2f9602ed350d6a2b7cae to your computer and use it in GitHub Desktop.
Save andrese52/55a20954defe2f9602ed350d6a2b7cae to your computer and use it in GitHub Desktop.
Wheat RNA sequencing microbiome analysis

RNAseq data analysis pipeline

Notes: 1.) When running jobs, always keep an eye on the scheduler. Some jobs may warrant requests for walltime extensions. 2.) Always check your output files for error messages to ensure your job completely correctly. 3.) Always double check your scripts. Files WILL overwrite.

download raw data and back up immediately

  1. Trimomatic -run separately for each replicate pair
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=72:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load jdk/1.8.0_45
module load trimmomatic/0.33

java -jar /opt/trimmomatic/0.33/trimmomatic-0.33.jar PE -phred33 -trimlog trimlog_mC1.txt \
/scratch/asecas86/Buster_denovo/merged/DH169_CT1_R1.fastq.gz /scratch/willyerd/Buster_denovo/merged/DH169_CT1_R2.fastq.gz \
m169C1_paired_a.fq.gz m169C1_unpaired_a.fq.gz m169C1_paired_b.fq.gz m169C1_unpaired_b.fq.gz \
ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25
  1. fastqc - download desktop app https://www.bioinformatics.babraham.ac.uk/projects/download.html --only work with paired files

  2. interleave paired reads (from trmmomatic output, a is left reads, b is right reads) -run for each pair of sequences for paired reads only

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR

module load khmer/1.4.1

interleave-reads.py ../m169C1_paired_a.fq.gz ../m169C1_paired_b.fq.gz -o DH169_C1_paired.fq
  1. normalized pairs (normalize areas with excess coverage using paired read file) -run for each paired read set
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR

module load khmer/1.4.1

normalize-by-median.py -C 30 -k 25 -N 4 -x 6e9 -p --savegraph 169c1_normC30k25.ct -o - DH169_C1_paired.fq >> m169C1_norm30x.fq
  1. split normaliaed pairs for down stream analysis
#!/bin/bash
#PBS -q batch 
#PBS -l nodes=1:ppn=12
#PBS -l walltime=48:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR

module load khmer/1.4.1

split-paired-reads.py -0 169C1_orphan.fq -1 169C1_left.fq -2 169C1_right.fq ../m169C1_norm30x.fq.gz
  1. hisat2 alignment of split pairs and unpaired reads to csw reference (in preparation for genome guided trinity)

A.) index reference

#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]

cd $PBS_O_WORKDIR

module load hisat2/2.1.0

hisat2-build -f -p 12 161010_Chinese_Spring_v1.0_pseudomolecules.fasta csw

B.) align reads to indexed reference

#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]

cd $PBS_O_WORKDIR

module load hisat2/2.1.0

hisat2 -q -p 12 --phred33 -x csw -1 169C1_left.fq,169C2_left.fq,169D1_left.fq,169D2_left.fq,173C1_left.fq,173C2_left.fq,173D3_left.fq,173D4_left.fq -2 169C1_right.fq,169C2_right.fq,169D1_right.fq,169D2_right.fq,173C1_right.fq,173C2_right.fq,173D3_right.fq,173D4_right.fq -U m169C1_unpaired_a.fq.gz,m169C1_unpaired_b.fq.gz,m169C2_unpaired_a.fq.gz,m169C2_unpaired_b.fq.gz,m169D1_unpaired_a.fq.gz,m169D1_unpaired_b.fq.gz,m169D2_unpaired_a.fq.gz,m169D2_unpaired_b.fq.gz,m173C1_unpaired_a.fq.gz,m173C1_unpaired_b.fq.gz,m173C2_unpaired_a.fq.gz,m173C2_unpaired_b.fq.gz,m173D3_unpaired_a.fq.gz,m173D3_unpaired_b.fq.gz,m173D4_unpaired_a.fq.gz,m173D4_unpaired_b.fq.gz -S CSWnorm_all.sam
  1. samtools: A.) convert output .sam to .bam
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load samtools/1.3.1

samtools view -bS -@ 12 GGall_CSW.sam > GGall_CSW.bam

B.) cordinate sort .bam file

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load samtools/1.3.1

samtools sort --threads 12 -m 2G GGall_CSW.bam > GGall_CSW.sorted.bam

C.) index sorted bam (for Jbrowse)

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load samtools/1.3.1

samtools index GGall_CSW.sorted.bam
  1. GGtrinity with cord sort bam
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load trinity/2.3.2

Trinity --genome_guided_bam CSW_all.sorted.bam --max_memory 250G --no_bowtie --genome_guided_max_intron 50000 --CPU 12
  1. run stats and quality assesment of assembly -read representation

A.) basic stats

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=24:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR
module load trinity

/opt/trinity/2.3.2/util/TrinityStats.pl Trinity.fasta

B.) ExN50- need matrices before this can be done

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR

module load trinity

/opt/trinity/2.3.2/util/misc/contig_ExN50_statistic.pl m169CD1matrix.TMM.EXPR.matrix m169CD1_trinity.fasta > CD1ExN50.stats

C.) plot ExN50

#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
 
cd $PBS_O_WORKDIR

module load trinity

/opt/trinity/2.3.2/util/misc/plot_ExN50_statistic.Rscript ExN50.stats
  1. hisat align reads back to assembly - create index of assembly
#!/bin/bash
#PBS -q bigmem
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe
#PBS -m abe -M [email protected]

cd $PBS_O_WORKDIR

module load hisat2/2.1.0

hisat2 -q -p 12 --phred33 -x AllTrinIdx \
-1 m173C1_paired_a.fq.gz,m173C2_paired_a.fq.gz,m173D3_paired_a.fq.gz,m173D4_paired_a.fq.gz,\
m169C1_paired_a.fq.gz,m169C2_paired_a.fq.gz,m169D1_paired_a.fq.gz,m169D2_paired_a.fq.gz \
-2 m173C1_paired_b.fq.gz,m173C2_paired_b.fq.gz,m173D3_paired_b.fq.gz,m173D4_paired_b.fq.gz,\
m169C1_paired_b.fq.gz,m169C2_paired_b.fq.gz,m169D1_paired_b.fq.gz,m169D2_paired_b.fq.gz \
-U m173C1_unpaired_a.fq.gz,m173C1_unpaired_b.fq.gz,m173C2_unpaired_a.fq.gz,m173C2_unpaired_b.fq.gz,\
m173D3_unpaired_a.fq.gz,m173D3_unpaired_b.fq.gz,m173D4_unpaired_a.fq.gz,m173D4_unpaired_b.fq.gz,\
m169C1_unpaired_a.fq.gz,m169C1_unpaired_b.fq.gz,m169C2_unpaired_a.fq.gz,m169C2_unpaired_b.fq.gz,\
m169D1_unpaired_a.fq.gz,m169D1_unpaired_b.fq.gz,m169D2_unpaired_a.fq.gz,m169D2_unpaired_b.fq.gz -S GGtrin_AlnAll.sam
  1. hisat align assembly to csw (for Jbrowse)
#!/bin/bash
#PBS -q batch
#PBS -l nodes=1:ppn=12
#PBS -l walltime=120:00:00
#PBS -j oe

cd $PBS_O_WORKDIR

module load hisat2

hisat2 -f -x /scratch/willyerd/Buster_denovo/m173trinity/hisat/csw/CSW -p 12 -U ../Trinity-allGG.fasta -S GGall_CSW.sam
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment