Skip to content

Instantly share code, notes, and snippets.

@mpkocher
Created April 22, 2015 23:05
Show Gist options
  • Save mpkocher/b7e7d1240fc027f27e7c to your computer and use it in GitHub Desktop.
Save mpkocher/b7e7d1240fc027f27e7c to your computer and use it in GitHub Desktop.
CCS Report snippet
import numpy as np
from pbcore.io.BasH5IO import BasH5Reader
class MovieResult(object):
"""Simple container class to hold the results of Movie (bax)"""
def __init__(self, file_name, movie_name, read_lengths, accuracies, num_passes):
self.file_name = file_name
self.movie_name = movie_name
# these are all np.array
self.read_lengths = read_lengths
self.accuracies = accuracies
self.num_passes = num_passes
def __str__(self):
_d = dict(k=self.__class__.__name__,
m=self.movie_name,
f=os.path.basename(self.file_name))
return "{k} {m} {f}".format(**_d)
def __repr__(self):
return "<" + str(self) + " > "
def _bas_file_to_movie_result(file_name):
"""Parse the ccs.h5 file and return a MovieResult instance"""
reader = BasH5Reader(file_name)
movie_name = reader.movieName
# Generators
def ccs_reads():
for x in reader.sequencingZmws:
yield reader[x]
# generators used to create np.arrays to avoid creating temporary lists
def _base_calls():
for r in ccs_reads():
if r.ccsRead is not None:
yield len(r.ccsRead.basecalls())
def _accuracies():
for r in ccs_reads():
yield r.readScore
def _num_passes():
for r in ccs_reads():
yield r.numPasses
read_lengths = np.fromiter(_base_calls(), dtype=np.int64, count=-1)
accuracies = np.fromiter(_accuracies(), dtype=np.float32, count=-1)
num_passes = np.fromiter(_num_passes(), dtype=np.int64, count=-1)
reader.close()
return MovieResult(file_name, movie_name, read_lengths, accuracies, num_passes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment