Skip to content

Instantly share code, notes, and snippets.

@standage
Last active August 1, 2017 22:50
Show Gist options
  • Save standage/dc83b27c0b11271926a1dddb864d7f98 to your computer and use it in GitHub Desktop.
Save standage/dc83b27c0b11271926a1dddb864d7f98 to your computer and use it in GitHub Desktop.
from __future__ import print_function
from collections import defaultdict
import argparse
import khmer
import statistics
import sys
import time
allocators = {
'ct': khmer.Counttable,
'cg': khmer.Countgraph,
'nt': khmer.Nodetable,
'ng': khmer.Nodegraph,
}
def timeload(sketch, infile, query=False):
start = time.time()
nr, nk = sketch.consume_seqfile(infile)
elapsed = time.time() - start
if query:
for n, read in enumerate(khmer.ReadParser(infile)):
for kmer in sketch.get_kmers(read.sequence):
assert sketch.get(kmer) == 0, sketch.get(kmer)
if n > 10000:
break
return elapsed
parser = argparse.ArgumentParser()
parser.add_argument('--memory', type=float, default=4e8)
parser.add_argument('--type', choices=allocators.keys(), default='ct')
parser.add_argument('--doquery', action='store_true')
parser.add_argument('infile')
args = parser.parse_args()
alloc = allocators[args.type]
sketch = alloc(25, args.memory / 4, 4)
elapsed = timeload(sketch, args.infile, query=args.doquery)
print('sketch={} insert={} time={:.2f}'.format(args.type, not args.doquery, elapsed))
sketch=ct insert=False time=379.35
sketch=ct insert=False time=379.44
sketch=ct insert=False time=378.06
sketch=cg insert=False time=72.24
sketch=cg insert=False time=72.49
sketch=cg insert=False time=72.60
sketch=ct insert=True time=706.32
sketch=ct insert=True time=708.33
sketch=ct insert=True time=717.29
sketch=cg insert=True time=451.87
sketch=cg insert=True time=451.94
sketch=cg insert=True time=452.96
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment