Skip to content

Instantly share code, notes, and snippets.

@bede
Last active August 17, 2016 13:11
Show Gist options
  • Save bede/7bccf9aad29a91d7539c138ac9a685bd to your computer and use it in GitHub Desktop.
Save bede/7bccf9aad29a91d7539c138ac9a685bd to your computer and use it in GitHub Desktop.
Generators vs. lists for sequence filtering
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"\n",
"from Bio import SeqIO\n",
"\n",
"wd = '/Users/Bede/Research/Notebooks/res/2016-08-16'\n",
"contigs_path = '/Users/Bede/Research/Notebooks/res/2016-08-16/31_c100.fa'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## List"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 339 ms per loop\n"
]
}
],
"source": [
"%%timeit -n 10\n",
"\n",
"def filter_length(records, min_len=0): # List comprehension\n",
" return [r for r in records if len(r.seq) >= min_len]\n",
"\n",
"def filter_longest(records, n=1): # List slicing; assumes seqs sorted by len\n",
" return records[:n]\n",
"\n",
"records = list(SeqIO.parse(contigs_path, 'fasta'))\n",
"filtered_records = filter_length(records, 200)\n",
"filter_longest(filtered_records, 100) # List"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generator"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Good case; optimised by lazy eval"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 2.34 ms per loop\n"
]
}
],
"source": [
"%%timeit -n 10\n",
"\n",
"def filter_length(records, min_len=0): # Generator\n",
" return (r for r in records if len(r.seq) >= min_len)\n",
"\n",
"def filter_longest(records, n=1): # Generator friendly slice; assumes seqs sorted by len\n",
" return (x for _, x in zip(range(n), records))\n",
" \n",
"records = SeqIO.parse(contigs_path, 'fasta')\n",
"filtered_records = filter_length(records, 200)\n",
"list(filter_longest(filtered_records, 100)) # List"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Bad case"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 313 ms per loop\n"
]
}
],
"source": [
"%%timeit -n 10\n",
"\n",
"def filter_length(records, min_len=0): # Generator\n",
" return (r for r in records if len(r.seq) >= min_len)\n",
"\n",
"def filter_longest(records, n=1): # Generator friendly slice; assumes seqs sorted by len\n",
" return (x for _, x in zip(range(n), records))\n",
" \n",
"records = SeqIO.parse(contigs_path, 'fasta')\n",
"filtered_records = filter_length(records, 500)\n",
"list(filter_longest(filtered_records, 100)) # List"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment