Skip to content

Instantly share code, notes, and snippets.

@lomereiter
Last active August 31, 2015 23:22
Show Gist options
  • Save lomereiter/74b7d4a962d1e1ccb07e to your computer and use it in GitHub Desktop.
Save lomereiter/74b7d4a962d1e1ccb07e to your computer and use it in GitHub Desktop.
Metabolomics pipeline
import sys
# path to pyIMS parent dir
sys.path.append("/home/lomereiter/github")
from pyIMS.image_measures.level_sets_measure import measure_of_chaos
from pyIMS.image_measures.isotope_image_correlation import isotope_image_correlation
from pyIMS.image_measures.isotope_pattern_match import isotope_pattern_match
import numpy as np
import cPickle
def txt_to_spectrum(s):
arr = s.strip().split("|")
# f4 is used to store data in HDF5; for cumsum we need precision, thus f8
intensities = np.fromstring("0 " + arr[2], sep=' ', dtype=np.dtype('f4'))
return (int(arr[0]), np.fromstring(arr[1], sep=' ', dtype=np.dtype('f4')),
np.cumsum(intensities, dtype=np.dtype('f8')))
def numpy_multiple_queries(lower, upper, lperm, uperm, mzs, cumsum_int,
result, pixel, tmp1, tmp2):
tmp1[lperm] = np.searchsorted(mzs, lower, 'l')
tmp2[uperm] = np.searchsorted(mzs, upper, 'r')
result[:, pixel] += cumsum_int[tmp2] - cumsum_int[tmp1]
def process_spectra_multiple_queries(mol_mz_intervals, spectra, m):
lower, upper, lperm, uperm = mol_mz_intervals
n = len(lower)
result = np.zeros((n, m))
query_ids = np.zeros(n, dtype=np.int)
intensities = np.zeros(n)
tmp1 = np.zeros(n, dtype=np.int)
tmp2 = np.zeros(n, dtype=np.int)
for sp in spectra:
pixel, mzs, cumsum_int = sp
numpy_multiple_queries(
lower, upper, lperm, uperm, mzs, cumsum_int,
result, pixel, tmp1, tmp2)
return result
def get_ion_images(spectra, q, nrows, ncols, ppm):
peaks = q["mzpeaks"]
query_lens = np.array(map(len, peaks))
mzs = np.array([s for _q in peaks for s in _q])
tols = mzs * ppm / 1e6
lower = mzs - tols
upper = mzs + tols
lower_order = lower.argsort()
lower_sorted = lower[lower_order]
upper_order = upper.argsort()
upper_sorted = upper[upper_order]
bounds = (lower_sorted, upper_sorted, lower_order, upper_order)
qres = process_spectra_multiple_queries(bounds, spectra, nrows * ncols)
qres = np.split(qres, np.cumsum(query_lens)[:-1])
return qres
def compute_scores(qres, q, nrows, ncols, nlvl):
m = len(qres)
measure_value_score = np.zeros(m)
iso_ratio_score = np.zeros(m)
iso_correlation_score = np.ones(m)
def process(i, intensities):
d = qres[i]
img = d[0].reshape((nrows, ncols))
# measure_of_chaos modifies image in-place, so provide it with a copy
measure_value_score[i] = 1-measure_of_chaos(img.copy(),nlvl,interp=False)[0]
if measure_value_score[i] == 1: measure_value_score[i] = 0
if len(d) > 1:
iso_corr = isotope_image_correlation(d, weights=intensities[1:])
iso_correlation_score[i] = iso_corr
iso_ratio_score[i] = isotope_pattern_match(d, intensities)
for i in xrange(len(qres)):
process(i, np.array(q["intensities"][i]))
r=[iso_ratio_score, iso_correlation_score, measure_value_score]
return r
def run_pipeline(spectra_fn, queries_fn, out_fn, nrows, ncols, nlvl, ppm):
print "reading queries"
with open(queries_fn) as f:
q = cPickle.load(f)
chunk_size = 1000
n_chunks = len(q["mzpeaks"]) / chunk_size + 1
scores = [[], [], []]
chunk_scores = []
print "reading spectra"
spectra_str = open(spectra_fn).readlines()
spectra = map(txt_to_spectrum, spectra_str)
for offset in xrange(0, len(q["mzpeaks"]), chunk_size):
print("processing chunk #%d/%d" % (offset / chunk_size + 1, n_chunks))
q_chunk = {}
for key in q.keys():
q_chunk[key] = q[key][offset:offset+chunk_size]
print "\tgetting ion images"
qres = get_ion_images(spectra, q_chunk, nrows, ncols, ppm)
print "\tcomputing scores"
chunk_scores.append(compute_scores(qres, q_chunk, nrows, ncols, nlvl))
qres = None
import gc
gc.collect()
for i in xrange(3):
scores[i] = np.concatenate([x[i] for x in chunk_scores])
print "dumping scores to disk"
with open(out_fn, 'w') as f:
cPickle.dump(scores, f)
run_pipeline("/home/lomereiter/metabolomics/bladder.txt", # dataset in Sergey's format
"/home/lomereiter/metabolomics/queries.pkl",
"/home/lomereiter/metabolomics/scores_bladder.pkl", # output filename
134, 260, 30, # 134x260 image, 30 levels to use in measure_of_chaos
100.0 # ppm
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment