Last active
August 29, 2015 14:22
-
-
Save arraytools/a3d2fdcd0e2ff37110a0 to your computer and use it in GitHub Desktop.
tophat2 with -G and --transcriptome-index options
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| If no fasta file is found alongside the other index files, tophat will use the bowtie index you give | |
| to build this file and save it to the output directory. This step can take up to an hour for a | |
| human-sized genome. | |
| ############################################################################################################ | |
| #!/bin/bash | |
| export PATH=$PATH:/opt/RNA-Seq/bin/bowtie2-2.2.1:/opt/RNA-Seq/bin/tophat-2.0.11.Linux_x86_64 | |
| export PATH=$PATH:/opt/RNA-Seq/bin/tophat-2.0.11.Linux_x86_64 | |
| export PATH=$PATH:/opt/RNA-Seq/bin/samtools-0.1.19/:/opt/RNA-Seq/bin/samtools-0.1.19/bcftools:/opt/RNA-Seq/bin/samtools-0.1.19/misc | |
| cd /home/brb/Anders2013 | |
| ############################################################################################################ | |
| brb@brb-T3500:~/Anders2013$ ls -lh ~/igenome/fly/EnsemblBDGP5/ | |
| total 373M | |
| -rw------- 1 brb brb 85M Sep 17 2014 genes.gtf | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.2.bt2 | |
| -rw------- 1 brb brb 143 Sep 17 2014 genome.3.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.4.bt2 | |
| -rw------- 1 brb brb 117M Sep 17 2014 genome.fa | |
| -rw-rw-r-- 1 brb brb 175 May 7 08:26 genome.fa.fai | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.rev.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.rev.2.bt2 | |
| brb@brb-T3500:~$ ls -l /home/brb/Drosophila_melanogaster/Ensembl/BDGP5/Sequence/Bowtie2Index | |
| total 174652 | |
| -rwxrwxr-x 1 brb brb 44298163 Apr 10 2012 genome.1.bt2 | |
| -rwxrwxr-x 1 brb brb 30077620 Apr 10 2012 genome.2.bt2 | |
| -rwxrwxr-x 1 brb brb 143 Apr 10 2012 genome.3.bt2 | |
| -rwxrwxr-x 1 brb brb 30077616 Apr 10 2012 genome.4.bt2 | |
| lrwxrwxrwx 1 brb brb 29 May 27 11:24 genome.fa -> ../WholeGenomeFasta/genome.fa | |
| -rwxrwxr-x 1 brb brb 44298163 Apr 10 2012 genome.rev.1.bt2 | |
| -rwxrwxr-x 1 brb brb 30077620 Apr 10 2012 genome.rev.2.bt2 | |
| brb@brb-T3500:~$ ls -lh ~/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/ | |
| total 882M | |
| -rwxrwxr-x 1 brb brb 874M May 23 2014 genes.gtf | |
| -rwxrwxr-x 1 brb brb 7.9M May 23 2014 refFlat.txt.gz | |
| ############################################################################################################ | |
| # Example 1. Wrong specification of '--transcriptome-index' option. | |
| ############################################################################################################ | |
| brb@brb-T3500:~/Anders2013$ tophat2 --transcriptome-only --max-multihits 1 \ | |
| --GTF ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=~/igenome/fly/EnsemblBDGP5/genes \ # 'genes' means prefix | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-26 16:42:23] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-26 16:42:23] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-26 16:42:23] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-26 16:42:24] Checking for Bowtie index files (genome).. | |
| [2015-05-26 16:42:24] Checking for reference FASTA file | |
| [2015-05-26 16:42:24] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-26 16:42:24] Reading known junctions from GTF file | |
| [2015-05-26 16:42:26] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-26 16:45:45] Building transcriptome data files ~/igenome/fly/EnsemblBDGP5/genes | |
| [2015-05-26 16:45:49] Building Bowtie index from genes.fa | |
| [2015-05-26 16:53:43] Mapping left_kept_reads to transcriptome genes with Bowtie2 | |
| [FAILED] | |
| Error running bowtie: | |
| Could not locate a Bowtie index corresponding to basename "/home/brb/igenome/fly/EnsemblBDGP5/genes" | |
| Error: Encountered internal Bowtie 2 exception (#1) | |
| Command: /opt/RNA-Seq/bin/bowtie2-2.2.1/bowtie2-align-s --wrapper basic-0 -k 60 -D 15 | |
| -R 2 -N 0 -L 20 -i S,1,1.25 --gbar 4 --mp 6,2 --np 1 --rdg 5,3 --rfg 5,3 | |
| --score-min C,-14,0 -p 5 --sam-no-hd -x /home/brb/igenome/fly/EnsemblBDGP5/genes - | |
| (ERR): bowtie2-align exited with value 1 | |
| ############################################################################################################ | |
| # Example 2, | |
| # w/ -G, w/ .fa file | |
| # | |
| # In '--transcriptome-index=~/igenome/fly/EnsemblBDGP5/genome' option, 'genome' is a prefix | |
| # (I have almost all files) although files under ~/igenome/fly/EnsemblBDGP5/ are downloaded from iGenomes | |
| # instead of generated by tophat2. | |
| ############################################################################################################ | |
| tophat2 --transcriptome-only --max-multihits 1 \ | |
| --GTF ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=~/igenome/fly/EnsemblBDGP5/genome \ | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 08:19:30] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 08:19:30] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 08:19:30] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 08:19:30] Checking for Bowtie index files (genome).. | |
| [2015-05-27 08:19:30] Checking for reference FASTA file | |
| [2015-05-27 08:19:30] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 08:19:30] Reading known junctions from GTF file | |
| [2015-05-27 08:19:33] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 08:22:53] Building transcriptome data files ~/igenome/fly/EnsemblBDGP5/genome <===== WASTE TIME | |
| [2015-05-27 08:22:57] Building Bowtie index from genome.fa <===== WASTE TIME | |
| [2015-05-27 08:30:44] Mapping left_kept_reads to transcriptome genome with Bowtie2 | |
| [2015-05-27 08:41:18] Reporting output tracks | |
| [FAILED] | |
| Error running /opt/RNA-Seq/bin/tophat-2.0.11.Linux_x86_64/tophat_reports --min-anchor 8 --splice-mismatches 0 --min-report-intron 50 --max-report-intron 500000 --min-isoform-fraction 0.15 --output-dir ./tophat_out/ --max-multihits 1 --max-seg-multihits 10 --segment-length 25 --segment-mismatches 2 --min-closure-exon 100 --min-closure-intron 50 --max-closure-intron 5000 --min-coverage-intron 50 --max-coverage-intron 20000 --min-segment-intron 50 --max-segment-intron 500000 --read-mismatches 2 --read-gap-length 2 --read-edit-dist 2 --read-realign-edit-dist 3 --max-insertion-length 3 --max-deletion-length 3 -z gzip -p5 --gtf-annotations ~/igenome/fly/EnsemblBDGP5/genome.gff --gtf-juncs ./tophat_out/tmp/genome.juncs --no-closure-search --no-coverage-search --no-microexon-search --sam-header ./tophat_out/tmp/genome_genome.bwt.samheader.sam --report-discordant-pair-alignments --report-mixed-alignments --samtools=/opt/RNA-Seq/bin/samtools-0.1.19/samtools --bowtie2-max-penalty 6 --bowtie2-min-penalty 2 --bowtie2-penalty-for-N 1 --bowtie2-read-gap-open 5 --bowtie2-read-gap-cont 3 --bowtie2-ref-gap-open 5 --bowtie2-ref-gap-cont 3 /home/brb/igenome/fly/EnsemblBDGP5/genome.fa ./tophat_out/junctions.bed ./tophat_out/insertions.bed ./tophat_out/deletions.bed ./tophat_out/fusions.out ./tophat_out/tmp/accepted_hits ./tophat_out/tmp/left_kept_reads.m2g.bam ./tophat_out/tmp/left_kept_reads.bam | |
| Warning: mapped sequence without CIGAR (SRR031728.1214) | |
| ############################################################################################################ | |
| # Example 3, | |
| # No -G option, no fa file can be found. | |
| # Since no -G option, tophat won't build transcriptome file nor bowtie index. | |
| ############################################################################################################ | |
| tophat2 -p 5 -o ./tophat_out Dme1_BDGP5_70 SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 08:50:02] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 08:50:02] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 08:50:02] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 08:50:02] Checking for Bowtie index files (genome).. | |
| [2015-05-27 08:50:02] Checking for reference FASTA file | |
| Warning: Could not find FASTA file Dme1_BDGP5_70.fa | |
| [2015-05-27 08:50:02] Reconstituting reference FASTA file from Bowtie index | |
| Executing: /opt/RNA-Seq/bin/bowtie2-2.2.1/bowtie2-inspect Dme1_BDGP5_70 > ./tophat_out/tmp/Dme1_BDGP5_70.fa | |
| [2015-05-27 08:50:09] Generating SAM header for Dme1_BDGP5_70 | |
| [2015-05-27 08:50:10] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 08:53:35] Mapping left_kept_reads to genome Dme1_BDGP5_70 with Bowtie2 | |
| [2015-05-27 09:06:08] Mapping left_kept_reads_seg1 to genome Dme1_BDGP5_70 with Bowtie2 (1/3) | |
| [2015-05-27 09:07:14] Mapping left_kept_reads_seg2 to genome Dme1_BDGP5_70 with Bowtie2 (2/3) | |
| [2015-05-27 09:08:17] Mapping left_kept_reads_seg3 to genome Dme1_BDGP5_70 with Bowtie2 (3/3) | |
| [2015-05-27 09:09:15] Searching for junctions via segment mapping | |
| [2015-05-27 09:10:12] Retrieving sequences for splices | |
| [2015-05-27 09:10:18] Indexing splices | |
| Building a SMALL index | |
| [2015-05-27 09:10:22] Mapping left_kept_reads_seg1 to genome segment_juncs with Bowtie2 (1/3) | |
| [2015-05-27 09:10:46] Mapping left_kept_reads_seg2 to genome segment_juncs with Bowtie2 (2/3) | |
| [2015-05-27 09:11:07] Mapping left_kept_reads_seg3 to genome segment_juncs with Bowtie2 (3/3) | |
| [2015-05-27 09:11:25] Joining segment hits | |
| [2015-05-27 09:12:12] Reporting output tracks | |
| ----------------------------------------------- | |
| [2015-05-27 09:23:41] A summary of the alignment counts can be found in ./tophat_out/align_summary.txt | |
| [2015-05-27 09:23:41] Run complete: 00:33:39 elapsed | |
| ############################################################################################################ | |
| # Example 4, Proper way of using -G and --transcriptome-index options (2 steps) | |
| ############################################################################################################ | |
| # Step 1: Generate transcriptome seq file and bowtie index | |
| tophat2 -G ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=transcriptome_data/known \ | |
| ~/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 09:41:18] Building transcriptome files with TopHat v2.0.11 | |
| ----------------------------------------------- | |
| [2015-05-27 09:41:18] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 09:41:18] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 09:41:18] Checking for Bowtie index files (genome).. | |
| [2015-05-27 09:41:18] Checking for reference FASTA file | |
| [2015-05-27 09:41:18] Building transcriptome data files transcriptome_data/known <===== For step 2 | |
| [2015-05-27 09:41:23] Building Bowtie index from known.fa <===== For step 2 | |
| ----------------------------------------------- | |
| [2015-05-27 09:49:01] Transcriptome files prepared. This was the only task requested. | |
| # Compare the files tophat2 generated and downloaded from Illumina iGenomes | |
| # It seems better to rename genes.gtf to genome.gtf in the ~/igenome/fly/EnsemblBDGP5/. | |
| brb@brb-T3500:~/Anders2013$ ls -lh ~/igenome/fly/EnsemblBDGP5/ | |
| total 373M | |
| -rw------- 1 brb brb 85M Sep 17 2014 genes.gtf | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.2.bt2 | |
| -rw------- 1 brb brb 143 Sep 17 2014 genome.3.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.4.bt2 | |
| -rw------- 1 brb brb 117M Sep 17 2014 genome.fa | |
| -rw-rw-r-- 1 brb brb 175 May 7 08:26 genome.fa.fai | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.rev.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.rev.2.bt2 | |
| brb@brb-T3500:~/Anders2013$ ls -lh transcriptome_data/ | |
| total 284M | |
| -rw-rw-r-- 1 brb brb 32M May 27 09:45 known.1.bt2 | |
| -rw-rw-r-- 1 brb brb 19M May 27 09:45 known.2.bt2 | |
| -rw-rw-r-- 1 brb brb 253K May 27 09:41 known.3.bt2 | |
| -rw-rw-r-- 1 brb brb 19M May 27 09:41 known.4.bt2 | |
| -rw-rw-r-- 1 brb brb 78M May 27 09:41 known.fa | |
| -rw-rw-r-- 1 brb brb 3.2M May 27 09:41 known.fa.tlst | |
| -rw------- 1 brb brb 85M May 27 09:41 known.gff | |
| -rw-rw-r-- 1 brb brb 32M May 27 09:49 known.rev.1.bt2 | |
| -rw-rw-r-- 1 brb brb 19M May 27 09:49 known.rev.2.bt2 | |
| -rw-rw-r-- 1 brb brb 22 May 27 09:41 known.ver | |
| brb@brb-T3500:~/Anders2013$ head -n 3 transcriptome_data/known.fa | |
| >0 FBtr0300689 2L+ 7529-8116,8193-9484 | |
| CTACTCGCATGTAGAGATTTCCACTTATGTTTTCTCTACTTTCAGCAACCGAGAAGAGAA | |
| CCCACGTTTGAACAAGTATCGGCGTGTGGACAACAGCTATCCCCGCTTCATAACGAATGA | |
| brb@brb-T3500:~/Anders2013$ head -n 3 ~/igenome/fly/EnsemblBDGP5/genome.fa | |
| >2L | |
| CGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCC | |
| ATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTT | |
| # Step 2: Running alignment for each sample | |
| tophat2 -G ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=transcriptome_data/known \ | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 09:53:00] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 09:53:00] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 09:53:00] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 09:53:00] Checking for Bowtie index files (transcriptome).. | |
| [2015-05-27 09:53:00] Checking for Bowtie index files (genome).. | |
| [2015-05-27 09:53:00] Checking for reference FASTA file | |
| [2015-05-27 09:53:00] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 09:53:01] Reading known junctions from GTF file | |
| [2015-05-27 09:53:03] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 09:56:32] Using pre-built transcriptome data.. <========= SAVE TIME | |
| [2015-05-27 09:56:32] Mapping left_kept_reads to transcriptome known with Bowtie2 | |
| [2015-05-27 10:07:30] Resuming TopHat pipeline with unmapped reads | |
| [2015-05-27 10:07:30] Mapping left_kept_reads.m2g_um to genome genome with Bowtie2 | |
| [2015-05-27 10:11:13] Mapping left_kept_reads.m2g_um_seg1 to genome genome with Bowtie2 (1/3) | |
| [2015-05-27 10:11:46] Mapping left_kept_reads.m2g_um_seg2 to genome genome with Bowtie2 (2/3) | |
| [2015-05-27 10:12:17] Mapping left_kept_reads.m2g_um_seg3 to genome genome with Bowtie2 (3/3) | |
| [2015-05-27 10:12:45] Searching for junctions via segment mapping | |
| [2015-05-27 10:13:29] Retrieving sequences for splices | |
| [2015-05-27 10:13:33] Indexing splices | |
| Building a SMALL index | |
| [2015-05-27 10:13:39] Mapping left_kept_reads.m2g_um_seg1 to genome segment_juncs with Bowtie2 (1/3) | |
| [2015-05-27 10:13:49] Mapping left_kept_reads.m2g_um_seg2 to genome segment_juncs with Bowtie2 (2/3) | |
| [2015-05-27 10:13:58] Mapping left_kept_reads.m2g_um_seg3 to genome segment_juncs with Bowtie2 (3/3) | |
| [2015-05-27 10:14:07] Joining segment hits | |
| [2015-05-27 10:14:23] Reporting output tracks | |
| ----------------------------------------------- | |
| [2015-05-27 10:24:03] A summary of the alignment counts can be found in ./tophat_out/align_summary.txt | |
| [2015-05-27 10:24:03] Run complete: 00:31:02 elapsed | |
| tophat2 -G ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=transcriptome_data/known \ | |
| -p 5 -o ./tophat_out2 \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031718.fastq,SRR031719.fastq | |
| [2015-05-27 10:25:54] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 10:25:54] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 10:25:54] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 10:25:54] Checking for Bowtie index files (transcriptome).. | |
| [2015-05-27 10:25:54] Checking for Bowtie index files (genome).. | |
| [2015-05-27 10:25:55] Checking for reference FASTA file | |
| [2015-05-27 10:25:55] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 10:25:56] Reading known junctions from GTF file | |
| [2015-05-27 10:25:58] Preparing reads | |
| left reads: min. length=44, max. length=45, 10807773 kept reads (227376 discarded) | |
| [2015-05-27 10:27:12] Using pre-built transcriptome data.. | |
| [2015-05-27 10:27:13] Mapping left_kept_reads to transcriptome known with Bowtie2 | |
| [2015-05-27 10:30:27] Resuming TopHat pipeline with unmapped reads | |
| [2015-05-27 10:30:27] Mapping left_kept_reads.m2g_um to genome genome with Bowtie2 | |
| [2015-05-27 10:33:04] Mapping left_kept_reads.m2g_um_seg1 to genome genome with Bowtie2 (1/2) | |
| [2015-05-27 10:34:47] Mapping left_kept_reads.m2g_um_seg2 to genome genome with Bowtie2 (2/2) | |
| [2015-05-27 10:35:11] Searching for junctions via segment mapping | |
| Coverage-search algorithm is turned on, making this step very slow | |
| Please try running TopHat again with the option (--no-coverage-search) if this step takes too much time or memory. | |
| [2015-05-27 10:43:31] Retrieving sequences for splices | |
| [2015-05-27 10:43:38] Indexing splices | |
| Building a SMALL index | |
| [2015-05-27 10:44:31] Mapping left_kept_reads.m2g_um_seg1 to genome segment_juncs with Bowtie2 (1/2) | |
| [2015-05-27 10:45:49] Mapping left_kept_reads.m2g_um_seg2 to genome segment_juncs with Bowtie2 (2/2) | |
| [2015-05-27 10:46:14] Joining segment hits | |
| [2015-05-27 10:46:35] Reporting output tracks | |
| ----------------------------------------------- | |
| [2015-05-27 10:50:49] A summary of the alignment counts can be found in ./tophat_out2/align_summary.txt | |
| [2015-05-27 10:50:49] Run complete: 00:24:55 elapsed | |
| ############################################################################################################ | |
| # Example 5, Using -G and --transcriptome-index but using downloaded files from Illumina iGenomes | |
| ############################################################################################################ | |
| tophat2 --GTF ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=/home/brb/Drosophila_melanogaster/Ensembl/BDGP5/Sequence/Bowtie2Index/genome \ | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 11:40:49] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 11:40:49] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 11:40:50] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 11:40:50] Checking for Bowtie index files (genome).. | |
| [2015-05-27 11:40:50] Checking for reference FASTA file | |
| [2015-05-27 11:40:50] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 11:40:51] Reading known junctions from GTF file | |
| [2015-05-27 11:40:53] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 11:44:21] Building transcriptome data files /home/brb/Drosophila_melanogaster/Ensembl/BDGP5/Sequence/Bowtie2Index/genome | |
| [2015-05-27 11:44:27] Building Bowtie index from genome.fa | |
| ... | |
| ############################################################################################################ | |
| # Example 6, Similar to Example 5 but I also copy genes.gtf to genome.gtf | |
| # So 'genome' is used as a prefix for .gtf, .fa and .bt2 files now. | |
| # | |
| # Still missing a few files when compared to transcriptome_data/ directory that tophat2 generated | |
| # (see Example 4) so there are still a few differences. | |
| ############################################################################################################ | |
| brb@brb-T3500:~/Anders2013$ cp ~/igenome/fly/EnsemblBDGP5/genes.gtf ~/igenome/fly/EnsemblBDGP5/genome.gtf | |
| brb@brb-T3500:~/Anders2013$ ls -lh ~/igenome/fly/EnsemblBDGP5/ | |
| total 458M | |
| -rw------- 1 brb brb 85M Sep 17 2014 genes.gtf | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.2.bt2 | |
| -rw------- 1 brb brb 143 Sep 17 2014 genome.3.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.4.bt2 | |
| -rw------- 1 brb brb 117M Sep 17 2014 genome.fa | |
| -rw-rw-r-- 1 brb brb 175 May 7 08:26 genome.fa.fai | |
| -rw------- 1 brb brb 85M May 27 11:35 genome.gtf | |
| -rw------- 1 brb brb 43M Sep 17 2014 genome.rev.1.bt2 | |
| -rw------- 1 brb brb 29M Sep 17 2014 genome.rev.2.bt2 | |
| tophat2 --transcriptome-only --max-multihits 1 \ | |
| --GTF ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| --transcriptome-index=~/igenome/fly/EnsemblBDGP5/genome \ | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 11:51:26] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 11:51:26] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 11:51:26] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 11:51:26] Checking for Bowtie index files (transcriptome).. | |
| [2015-05-27 11:51:26] Checking for Bowtie index files (genome).. | |
| [2015-05-27 11:51:26] Checking for reference FASTA file | |
| [2015-05-27 11:51:26] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 11:51:26] Reading known junctions from GTF file | |
| [2015-05-27 11:51:28] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 11:54:47] Using pre-built transcriptome data.. | |
| [2015-05-27 11:54:47] Mapping left_kept_reads to transcriptome genome with Bowtie2 | |
| [2015-05-27 12:04:55] Reporting output tracks | |
| [FAILED] | |
| Error running /opt/RNA-Seq/bin/tophat-2.0.11.Linux_x86_64/tophat_reports --min-anchor 8 --splice-mismatches 0 --min-report-intron 50 --max-report-intron 500000 --min-isoform-fraction 0.15 --output-dir ./tophat_out/ --max-multihits 1 --max-seg-multihits 10 --segment-length 25 --segment-mismatches 2 --min-closure-exon 100 --min-closure-intron 50 --max-closure-intron 5000 --min-coverage-intron 50 --max-coverage-intron 20000 --min-segment-intron 50 --max-segment-intron 500000 --read-mismatches 2 --read-gap-length 2 --read-edit-dist 2 --read-realign-edit-dist 3 --max-insertion-length 3 --max-deletion-length 3 -z gzip -p5 --gtf-annotations ~/igenome/fly/EnsemblBDGP5/genome.gff --gtf-juncs ./tophat_out/tmp/genome.juncs --no-closure-search --no-coverage-search --no-microexon-search --sam-header ./tophat_out/tmp/genome_genome.bwt.samheader.sam --report-discordant-pair-alignments --report-mixed-alignments --samtools=/opt/RNA-Seq/bin/samtools-0.1.19/samtools --bowtie2-max-penalty 6 --bowtie2-min-penalty 2 --bowtie2-penalty-for-N 1 --bowtie2-read-gap-open 5 --bowtie2-read-gap-cont 3 --bowtie2-ref-gap-open 5 --bowtie2-ref-gap-cont 3 /home/brb/igenome/fly/EnsemblBDGP5/genome.fa ./tophat_out/junctions.bed ./tophat_out/insertions.bed ./tophat_out/deletions.bed ./tophat_out/fusions.out ./tophat_out/tmp/accepted_hits ./tophat_out/tmp/left_kept_reads.m2g.bam ./tophat_out/tmp/left_kept_reads.bam | |
| Error: CIGAR and sequence length are inconsistent!(CTCTCTGAGAGCGCGCCGGAAATGTGTAGGTGTGTGTTTGTGTGTGTGAGTTCTGGTTGCGAATGTGTGTGTGTG) | |
| ############################################################################################################ | |
| # Example 7, Using -G but no --transcriptome-index option. So tophat needs to build index files FOR EACH | |
| # SAMPELS (7 more minutes for each sample in the fly genome). | |
| ############################################################################################################ | |
| tophat2 -G ~/igenome/fly/EnsemblBDGP5/genes.gtf \ | |
| -p 5 -o ./tophat_out \ | |
| ~/igenome/fly/EnsemblBDGP5/genome SRR031728.fastq,SRR031729.fastq | |
| [2015-05-27 11:00:30] Beginning TopHat run (v2.0.11) | |
| ----------------------------------------------- | |
| [2015-05-27 11:00:30] Checking for Bowtie | |
| Bowtie version: 2.2.1.0 | |
| [2015-05-27 11:00:30] Checking for Samtools | |
| Samtools version: 0.1.19.0 | |
| [2015-05-27 11:00:30] Checking for Bowtie index files (genome).. | |
| [2015-05-27 11:00:30] Checking for reference FASTA file | |
| [2015-05-27 11:00:30] Generating SAM header for /home/brb/igenome/fly/EnsemblBDGP5/genome | |
| [2015-05-27 11:00:30] Reading known junctions from GTF file | |
| [2015-05-27 11:00:32] Preparing reads | |
| left reads: min. length=75, max. length=75, 17791147 kept reads (21719 discarded) | |
| [2015-05-27 11:04:04] Building transcriptome data files ./tophat_out/tmp/genes | |
| [2015-05-27 11:04:08] Building Bowtie index from genes.fa | |
| [2015-05-27 11:11:51] Mapping left_kept_reads to transcriptome genes with Bowtie2 | |
| [2015-05-27 11:22:55] Resuming TopHat pipeline with unmapped reads | |
| [2015-05-27 11:22:55] Mapping left_kept_reads.m2g_um to genome genome with Bowtie2 | |
| [2015-05-27 11:26:36] Mapping left_kept_reads.m2g_um_seg1 to genome genome with Bowtie2 (1/3) | |
| [2015-05-27 11:27:10] Mapping left_kept_reads.m2g_um_seg2 to genome genome with Bowtie2 (2/3) | |
| [2015-05-27 11:27:42] Mapping left_kept_reads.m2g_um_seg3 to genome genome with Bowtie2 (3/3) | |
| [2015-05-27 11:28:10] Searching for junctions via segment mapping | |
| [2015-05-27 11:28:54] Retrieving sequences for splices | |
| [2015-05-27 11:28:58] Indexing splices | |
| Building a SMALL index | |
| [2015-05-27 11:29:04] Mapping left_kept_reads.m2g_um_seg1 to genome segment_juncs with Bowtie2 (1/3) | |
| [2015-05-27 11:29:14] Mapping left_kept_reads.m2g_um_seg2 to genome segment_juncs with Bowtie2 (2/3) | |
| [2015-05-27 11:29:24] Mapping left_kept_reads.m2g_um_seg3 to genome segment_juncs with Bowtie2 (3/3) | |
| [2015-05-27 11:29:32] Joining segment hits | |
| [2015-05-27 11:29:48] Reporting output tracks | |
| ----------------------------------------------- | |
| [2015-05-27 11:39:22] A summary of the alignment counts can be found in ./tophat_out/align_summary.txt | |
| [2015-05-27 11:39:22] Run complete: 00:38:52 elapsed |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ grep transcript ~/igenome/human/NCBI/build37.2/genes.gtf | wc -l | |
| 819119 | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ grep transcript ~/igenome/human/UCSC/hg19/genes.gtf | wc -l | |
| 869204 | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ grep transcript ~/igenome/human/Ensembl/GRCh37/genes.gtf | wc -l | |
| 2280612 | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ wc -l ~/igenome/human/NCBI/build37.2/genes.gtf | |
| 819119 /home/brb/igenome/human/NCBI/build37.2/genes.gtf | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ wc -l ~/igenome/human/UCSC/hg19/genes.gtf | |
| 869204 /home/brb/igenome/human/UCSC/hg19/genes.gtf | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ wc -l ~/igenome/human/Ensembl/GRCh37/genes.gtf | |
| 2280612 /home/brb/igenome/human/Ensembl/GRCh37/genes.gtf | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ grep transcript ~/igenome/fly/EnsemblBDGP5/genes.gtf | wc -l | |
| 358027 | |
| brb@brb-T3500:~/Anders2013/chr2L/tophat_out_chr2L_denovo$ wc -l ~/igenome/fly/EnsemblBDGP5/genes.gtf | |
| 358027 /home/brb/igenome/fly/EnsemblBDGP5/genes.gtf | |
| $ grep transcript ~/igenome/fly/EnsemblBDGP5/genes.gtf | head -n 3 | |
| 2L protein_coding exon 7529 8116 . + . exon_id "FBgn0031208:1"; exon_number "1"; gene_biotype "protein_coding"; gene_id "FBgn0031208"; gene_name "CG11023"; p_id "P10409"; transcript_id "FBtr0300689"; transcript_name "CG11023-RB"; tss_id "TSS9411"; | |
| 2L protein_coding exon 7529 8116 . + . exon_id "FBgn0031208:1"; exon_number "1"; gene_biotype "protein_coding"; gene_id "FBgn0031208"; gene_name "CG11023"; p_id "P11767"; transcript_id "FBtr0330654"; transcript_name "CG11023-RD"; tss_id "TSS9411"; | |
| 2L protein_coding exon 7529 8116 . + . exon_id "FBgn0031208:1"; exon_number "1"; gene_biotype "protein_coding"; gene_id "FBgn0031208"; gene_name "CG11023"; p_id "P10194"; transcript_id "FBtr0300690"; transcript_name "CG11023-RC"; tss_id "TSS9411"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment