Created
October 3, 2012 02:23
-
-
Save gregcaporaso/3824571 to your computer and use it in GitHub Desktop.
Comparison of several features of tools for mapping DNA reads to a DNA reference database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "mapper-comparison-notes" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "def summarize_observation_tables(biom_fp_glob):\n", | |
| " fps = glob(biom_fp_glob)\n", | |
| " for fp in fps:\n", | |
| " print fp, \" \", \n", | |
| " !per_library_stats.py -i $fp | grep observation\n", | |
| " !per_library_stats.py -i $fp | grep otu" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 2 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from os.path import join\n", | |
| "from glob import glob\n", | |
| "\n", | |
| "working_dir = \"/Users/caporaso/outbox/mapper_comparisons/ipynb/\"\n", | |
| "input_seqs_fp = join(working_dir,\"seqs.fna\")\n", | |
| "reference_seqs_fp = \"/Users/caporaso/data/gg_otus_4feb2011/rep_set/gg_97_otus_4feb2011.fasta\"" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 3 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!count_seqs.py -i $input_seqs_fp" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "91000 : /Users/caporaso/outbox/mapper_comparisons/ipynb/seqs.fna (Sequence lengths (mean +/- std): 1389.3360 +/- 52.7229)\r\n", | |
| "91000 : Total\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 11 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "uclust-fast (closed-reference, uclust defaults)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "97% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp --max_accepts 1 --max_rejects 8 --stepwords 8 --word_length 8 -m uclust_ref -C -o uclust-fast_mapped-0.97_v97/ -s 0.97\n", | |
| "!time make_otu_table.py -i uclust-fast_mapped-0.97_v97/seqs_otus.txt -o uclust-fast_mapped-0.97_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t2m0.192s\r\n", | |
| "user\t1m59.011s\r\n", | |
| "sys\t0m1.140s\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t0m0.246s\r\n", | |
| "user\t0m0.189s\r\n", | |
| "sys\t0m0.056s\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 12 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "94% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp --max_accepts 1 --max_rejects 8 --stepwords 8 --word_length 8 -m uclust_ref -C -o uclust-fast_mapped-0.94_v97/ -s 0.94\n", | |
| "!time make_otu_table.py -i uclust-fast_mapped-0.94_v97/seqs_otus.txt -o uclust-fast_mapped-0.94_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t3m11.893s\r\n", | |
| "user\t3m10.740s\r\n", | |
| "sys\t0m1.119s\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t0m0.280s\r\n", | |
| "user\t0m0.228s\r\n", | |
| "sys\t0m0.050s\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 13 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "91% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp --max_accepts 1 --max_rejects 8 --stepwords 8 --word_length 8 -m uclust_ref -C -o uclust-fast_mapped-0.91_v97/ -s 0.91\n", | |
| "!time make_otu_table.py -i uclust-fast_mapped-0.91_v97/seqs_otus.txt -o uclust-fast_mapped-0.91_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t3m27.139s\r\n", | |
| "user\t3m25.859s\r\n", | |
| "sys\t0m1.165s\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t0m0.338s\r\n", | |
| "user\t0m0.283s\r\n", | |
| "sys\t0m0.052s\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 14 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "88% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp --max_accepts 1 --max_rejects 8 --stepwords 8 --word_length 8 -m uclust_ref -C -o uclust-fast_mapped-0.88_v97/ -s 0.88\n", | |
| "!time make_otu_table.py -i uclust-fast_mapped-0.88_v97/seqs_otus.txt -o uclust-fast_mapped-0.88_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t5m6.026s\r\n", | |
| "user\t5m4.795s\r\n", | |
| "sys\t0m1.217s\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\r\n", | |
| "real\t0m0.350s\r\n", | |
| "user\t0m0.295s\r\n", | |
| "sys\t0m0.054s\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 15 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "summarize_observation_tables('uclust-fast_mapped-0.*_v97/observation_table.biom')\n" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-fast_mapped-0.88_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 77007\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 1419\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-fast_mapped-0.91_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 62639\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 1667\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-fast_mapped-0.94_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 48121\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 852\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-fast_mapped-0.97_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 33315\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 298\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 4 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "uclust-strict (QIIME defaults)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "97% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp -m uclust_ref -C -o uclust-strict_mapped-0.97_v97/ -s 0.97\n", | |
| "!time make_otu_table.py -i uclust-strict_mapped-0.97_v97/seqs_otus.txt -o uclust-strict_mapped-0.97_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "94% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp -m uclust_ref -C -o uclust-strict_mapped-0.94_v97/ -s 0.94\n", | |
| "!time make_otu_table.py -i uclust-strict_mapped-0.94_v97/seqs_otus.txt -o uclust-strict_mapped-0.94_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "91% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp -m uclust_ref -C -o uclust-strict_mapped-0.91_v97/ -s 0.91\n", | |
| "!time make_otu_table.py -i uclust-strict_mapped-0.91_v97/seqs_otus.txt -o uclust-strict_mapped-0.91_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "88% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time pick_otus.py -i $input_seqs_fp -r $reference_seqs_fp -m uclust_ref -C -o uclust-strict_mapped-0.88_v97/ -s 0.88\n", | |
| "!time make_otu_table.py -i uclust-strict_mapped-0.88_v97/seqs_otus.txt -o uclust-strict_mapped-0.88_v97/observation_table.biom" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "summarize_observation_tables('uclust-strict_mapped-0.*_v97/observation_table.biom')\n" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-strict_mapped-0.88_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 77284\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 1331\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-strict_mapped-0.91_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 63879\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 982\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-strict_mapped-0.94_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 49970\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 603\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "uclust-strict_mapped-0.97_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 34771\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 293\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 5 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "usearch " | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "97% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m usearch -s 0.97 -o usearch_mapped-0.97_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "94% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m usearch -s 0.94 -o usearch_mapped-0.94_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "91% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m usearch -s 0.91 -o usearch_mapped-0.91_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "88% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m usearch -s 0.88 -o usearch_mapped-0.88_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "summarize_observation_tables('usearch_mapped-0.*_v97/observation_table.biom')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "usearch_mapped-0.88_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 88253\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 1831\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "usearch_mapped-0.91_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 73436\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 2373\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "usearch_mapped-0.94_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 53637\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 1245\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "usearch_mapped-0.97_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 37392\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 390\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 6 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "BWA-short" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "bwa doesn't take a percent id, but rather a specified number of allowed mismatches. Here I compute what that number should be based on the mean sequence length computed above" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# hard-coding mean length for now...\n", | |
| "mean_length = 1390\n", | |
| "n97 = str(mean_length - int(0.97 * mean_length))\n", | |
| "n94 = str(mean_length - int(0.94 * mean_length))\n", | |
| "n91 = str(mean_length - int(0.91 * mean_length))\n", | |
| "n88 = str(mean_length - int(0.88 * mean_length))\n", | |
| "print n97, n94, n91, n88" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "42 84 126 167\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 10 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "97% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m bwa-short -o bwa-short_mapped-0.97_v97 --max_diff $n97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "94% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m bwa-short -o bwa-short_mapped-0.94_v97 --max_diff $n94" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "91% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m bwa-short -o bwa-short_mapped-0.91_v97 --max_diff $n91" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "88% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m bwa-short -o bwa-short_mapped-0.88_v97 --max_diff $n88" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "summarize_observation_tables('bwa-short_mapped-0.*_v97/observation_table.biom')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "bwa-short_mapped-0.88_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 13944\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 110\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "bwa-short_mapped-0.91_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 10499\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 104\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "bwa-short_mapped-0.94_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 6167\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 89\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| " " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "bwa-short_mapped-0.97_v97/observation_table.biom " | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num observations (sequences): 2785\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Num otus: 68\r\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 7 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "blat (nt versus nt)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "97% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m blat-nt -s 0.97 -o blat-nt_mapped-0.97_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "94% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m blat-nt -s 0.94 -o blat-nt_mapped-0.94_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "91% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m blat-nt -s 0.91 -o blat-nt_mapped-0.91_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "88% similarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!time map_reads_to_reference.py -i $input_seqs_fp -r $reference_seqs_fp -m blat-nt -s 0.88 -o blat-nt_mapped-0.88_v97" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "summarize_observation_tables('blat-nt_mapped-0.*_v97/observation_table.biom')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 9 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment