Last active
August 17, 2016 13:11
-
-
Save bede/7bccf9aad29a91d7539c138ac9a685bd to your computer and use it in GitHub Desktop.
Generators vs. lists for sequence filtering
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"\n", | |
"from Bio import SeqIO\n", | |
"\n", | |
"wd = '/Users/Bede/Research/Notebooks/res/2016-08-16'\n", | |
"contigs_path = '/Users/Bede/Research/Notebooks/res/2016-08-16/31_c100.fa'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## List" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 loops, best of 3: 339 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit -n 10\n", | |
"\n", | |
"def filter_length(records, min_len=0): # List comprehension\n", | |
" return [r for r in records if len(r.seq) >= min_len]\n", | |
"\n", | |
"def filter_longest(records, n=1): # List slicing; assumes seqs sorted by len\n", | |
" return records[:n]\n", | |
"\n", | |
"records = list(SeqIO.parse(contigs_path, 'fasta'))\n", | |
"filtered_records = filter_length(records, 200)\n", | |
"filter_longest(filtered_records, 100) # List" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Generator" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Good case; optimised by lazy eval" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 loops, best of 3: 2.34 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit -n 10\n", | |
"\n", | |
"def filter_length(records, min_len=0): # Generator\n", | |
" return (r for r in records if len(r.seq) >= min_len)\n", | |
"\n", | |
"def filter_longest(records, n=1): # Generator friendly slice; assumes seqs sorted by len\n", | |
" return (x for _, x in zip(range(n), records))\n", | |
" \n", | |
"records = SeqIO.parse(contigs_path, 'fasta')\n", | |
"filtered_records = filter_length(records, 200)\n", | |
"list(filter_longest(filtered_records, 100)) # List" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Bad case" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 loops, best of 3: 313 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit -n 10\n", | |
"\n", | |
"def filter_length(records, min_len=0): # Generator\n", | |
" return (r for r in records if len(r.seq) >= min_len)\n", | |
"\n", | |
"def filter_longest(records, n=1): # Generator friendly slice; assumes seqs sorted by len\n", | |
" return (x for _, x in zip(range(n), records))\n", | |
" \n", | |
"records = SeqIO.parse(contigs_path, 'fasta')\n", | |
"filtered_records = filter_length(records, 500)\n", | |
"list(filter_longest(filtered_records, 100)) # List" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment