import matplotlib
# Deeptools will mess with matplotlib settings, fix that
_mpl_backend = matplotlib.rcParams['backend']
from deeptools.heatmapper import heatmapper as deeptools_heatmapper
matplotlib.use(_mpl_backend)
del _mpl_backend
import pandas as pd
import numpy as np
def read_deeptools_matrix_to_dataframe(matrix_filename):
hm = deeptools_heatmapper()
hm.read_matrix_file(matrix_filename)
matrix = hm.matrix
matrix_numpy = matrix.matrix
index = []
for group_label, regions in zip(matrix.group_labels, matrix.get_regions()):
for region in regions:
region_str = []
chrom = region[0]
coords = ','.join(['{}-{}'.format(*x) for x in region[1]])
region_str.append(f'{chrom}:{coords}')
if len(region) >= 3:
name = region[2]
region_str.append(name)
# Skip region[3]
if len(region) >= 5:
strand = region[4]
region_str.append(strand)
region_str = '|'.join(region_str)
index.append((group_label, region_str))
index = pd.MultiIndex.from_tuples(index, names=['region_group', 'region'])
cols = []
for sample_label, (start_col, end_col) in zip(matrix.sample_labels,
zip(matrix.sample_boundaries, matrix.sample_boundaries[1:])):
bins_range = np.arange(0, end_col-start_col, dtype=int)
for bin_ in bins_range:
cols.append((sample_label, bin_))
cols = pd.MultiIndex.from_tuples(cols, names=['sample_label', 'bin'])
return pd.DataFrame(matrix_numpy, index=index, columns=cols)
Last active
November 12, 2020 19:15
-
-
Save lukauskas/a7c28d8a96b141d6c1b343783d0cdbd1 to your computer and use it in GitHub Desktop.
Converting deeptools matrix to a pandas dataframe
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment