Last active
July 18, 2019 09:38
-
-
Save Phlya/331800cb03cb42c68cf151b24e90a0fb to your computer and use it in GitHub Desktop.
Get scaling plot and data from expected files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Thu Jul 18 10:07:49 2019 | |
@author: s1529682 | |
""" | |
from cooltools.lib import numutils | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import argparse | |
import os | |
def get_scaling(expfile, res, bins): | |
expected = pd.read_csv(expfile, sep='\t') | |
expected = expected[~expected['chrom'].isin(['chrY', 'chrM'])] | |
expected['diag'] *= res | |
expected['dist'] = pd.cut(expected['diag'], bins, | |
labels=np.mean([bins[:-1], bins[1:]], axis=0)) | |
sc = expected.groupby('dist').mean()['balanced.avg'] | |
x = np.array(sc.index) | |
y = sc.values | |
return y/np.nansum(y) | |
def main(): | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument("--resolution", '-r', type=int, | |
help="""Resolution of the """) | |
parser.add_argument("--output", '-o', type=str, | |
help="""Where to save the data; will save a plot | |
in the file witht he same namde but .pdf""") | |
parser.add_argument("expected_files", type=str, | |
nargs='*', | |
help="""All files to analyse""") | |
args = parser.parse_args() | |
exps = args.expected_files | |
names = [os.path.splitext(os.path.basename(name))[0] for name in exps] | |
bins = numutils.logbins(args.resolution, 2.5*10**8, 1.2) | |
scalings = {name:get_scaling(exp, args.resolution, bins) for name, exp in zip(names, exps)} | |
bins = np.mean([bins[:-1], bins[1:]], axis=0) | |
scalings = pd.DataFrame(scalings, index=bins).dropna() | |
scalings = scalings.apply(lambda x: x/np.trapz(x.values, x.index)) | |
scalings.to_csv(args.output, sep='\t') | |
f, ax = plt.subplots(figsize=(8, 8)) | |
plt.loglog() | |
for name in scalings.columns: | |
plt.plot(scalings.index, scalings[name], label=name, lw=2) | |
ax.set_aspect('equal') | |
ax.set_xlabel('Genomic distance') | |
ax.set_ylabel('Normalized Hi-C signal') | |
ax.grid() | |
ax.legend() | |
# plt.xlim(10**7) | |
# plt.ylim(10**-5, 10**-3) | |
plt.tight_layout() | |
plt.savefig(os.path.splitext(args.output)[0]+'.pdf') | |
if __name__=='__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment