Last active
September 28, 2015 08:55
-
-
Save jni/f165ae2aea45bf900c95 to your computer and use it in GitHub Desktop.
Throughput of simple streaming of text data with Toolz/CyToolz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from IPython import get_ipython | |
import toolz as tz | |
from toolz import curried as c | |
fn = 'data/mb1_dm6.fa' | |
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, tz.last)') | |
print('Raw throughput (lines): %.2fMB/s' % (1 / t.best)) | |
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, tz.concat, tz.last)') | |
print('Single character throughput: %.2fMB/s' % (1 / t.best)) | |
def is_sequence(line): | |
return len(line) > 1 and not line.startswith('>') | |
nucleotides = set('ACGTacgt') | |
def is_nucleotide(char): | |
return char in nucleotides | |
t = get_ipython().magic('timeit -o -q tz.pipe(fn, open, c.filter(is_sequence), tz.concat, c.filter(is_nucleotide), tz.last)') | |
print('Filtered throughput: %.2fMB/s' % (1 / t.best)) | |
# Cython; spoiler alert: doesn't help | |
import cytoolz as ctz | |
from cytoolz import curried as cc | |
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, ctz.last)') | |
print('Cython raw throughput (lines): %.2fMB/s' % (1 / t.best)) | |
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, ctz.concat, ctz.last)') | |
print('Cython single character throughput: %.2fMB/s' % (1 / t.best)) | |
t = get_ipython().magic('timeit -o -q ctz.pipe(fn, open, cc.filter(is_sequence), ctz.concat, cc.filter(is_nucleotide), ctz.last)') | |
print('Cython filtered throughput: %.2fMB/s' % (1 / t.best)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment