Last active
January 19, 2018 01:19
-
-
Save mbk0asis/3c756132517436cc70a0 to your computer and use it in GitHub Desktop.
cufflinks pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gather "accepted_hits.bam" to one directory | |
$ find . -name "accepted_hits.bam" > bam_list | |
$ while read line; do cp $line $(dirname $line).bam ; done < bam_list | |
# rename bam files to shorten the names (e.g. SAMPLE.trim.cut.fq.gz.bam --> SAMPLE.bam) | |
$ rename 's/trim.cut.fq.gz.//g' * | |
# sort/rmdup BAM files (Optional but strongly recommended) | |
$ for b in ./*.bam; do samtools sort -@ 8 $b $b.sorted ; done | |
# will produce 'AAA.bam.sorted.bam' | |
# to remove duplicated reads | |
$ for b in ./*.sorted.bam; do samtools rmdup -s $b $b.rmdup.bam ; done | |
# will produce 'AAA.bam.sorted.bam.rmdup.bam' | |
# cufflinks | |
# '-G /path/to/genes.gtf' let cufflinks count reads mapped only | |
# on known (reference) transcripts. | |
$ mkdir cufflinks | |
$ for b in BAM/*.rmdup.bam; do i=$(basename $b); echo ' >>> ' $i ; echo;\ | |
cufflinks -p 8 --library-type fr-firststrand -G ~/00-NGS/Annotation/Bos_taurus_UCSC_bosTau8/GTF/genes.gtf \ | |
-o cufflinks/$i $b; echo;\ | |
done | |
# generate 'assembly.txt' file | |
$ find . -name "transcripts.gtf" > assembly.txt | |
$ cat assembly.txt | |
/path/to/Sample1/transcripts.gtf | |
/path/to/Sample2/transcripts.gtf | |
/path/to/Sample3/transcripts.gtf | |
/path/to/Sample4/transcripts.gtf | |
# cuffmerge | |
$ cuffmerge -p 8 -g genes.gtf assembly.txt | |
# you can go directly to 'cuffdiff' or do 'cuffquant --> cuffdiff or cuffnorm' | |
# cuffquant | |
$ for b in ../BAM/*.rmdup.bam; do i=$(basename $b); echo ' >>> ' $i ; echo;\ | |
cufflinks -p 8 merged_asm/merged.gtf -o cufflinks/$i $b; echo; done | |
# generate sample_sheet | |
$ find . -name "abundances.cxb" > sample_sheet | |
# or | |
$ for b in /PATH/TO/*.bam; | |
do | |
echo $b >> sample_seet | |
done | |
# sample_sheet should look like below | |
# the code above generates only 'sample_id' | |
# add group names manually | |
sample group | |
/path/to/C1_R1.sam C1 | |
/path/to/C1_R2.sam C1 | |
/path/to/C2_R1.sam C2 | |
/path/to/C2_R2.sam C2 | |
or | |
sample group | |
/path/to/C1_R1.cxb C1 | |
/path/to/C1_R2.cxb C1 | |
/path/to/C2_R1.cxb C2 | |
/path/to/C2_R2.cxb C2 | |
# cuffnorm | |
$ cuffnorm -p 8 --library-type fr-firststrand --use-sample-sheet ../merged_asm/merged.gtf -o norm_out sample_sheet | |
# cuffdiff | |
$ cuffdiff -p 8 --library-type fr-firststrand --use-sample-sheet ../merged_asm/merged.gtf -o diff_out/ sample_sheet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment