Phlya · July 18, 2019 09:38
diff --git a/scaling_from_expected.py b/scaling_from_expected.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jul 18 10:07:49 2019

 @author: s1529682
 """

 from cooltools.lib import numutils
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import argparse
 import os

 def get_scaling(expfile, res, bins):
    expected = pd.read_csv(expfile, sep='\t')
    expected = expected[~expected['chrom'].isin(['chrY', 'chrM'])]
    expected['diag'] *= res
    expected['dist'] = pd.cut(expected['diag'], bins,
                              labels=np.mean([bins[:-1], bins[1:]], axis=0))
    sc = expected.groupby('dist').mean()['balanced.avg']
    x = np.array(sc.index)
    y = sc.values
    return y/np.nansum(y)

 def main():
    parser = argparse.ArgumentParser(
                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--resolution", '-r', type=int,
                    help="""Resolution of the """)
    parser.add_argument("--output", '-o', type=str,
                        help="""Where to save the data; will save a plot
                        in the file witht he same namde but .pdf""")
    parser.add_argument("expected_files", type=str,
                nargs='*',
                help="""All files to analyse""")

    args = parser.parse_args()

    exps = args.expected_files

    names = [os.path.splitext(os.path.basename(name))[0] for name in exps]
    bins = numutils.logbins(args.resolution, 2.5*10**8, 1.2)

    scalings = {name:get_scaling(exp, args.resolution, bins) for name, exp in zip(names, exps)}

    bins = np.mean([bins[:-1], bins[1:]], axis=0)
    scalings = pd.DataFrame(scalings, index=bins).dropna()
    scalings = scalings.apply(lambda x: x/np.trapz(x.values, x.index))

    scalings.to_csv(args.output, sep='\t')

    f, ax = plt.subplots(figsize=(8, 8))
    plt.loglog()
    for name in scalings.columns:
        plt.plot(scalings.index, scalings[name], label=name, lw=2)
    ax.set_aspect('equal')
    ax.set_xlabel('Genomic distance')
    ax.set_ylabel('Normalized Hi-C signal')
    ax.grid()
    ax.legend()
    # plt.xlim(10**7)
    # plt.ylim(10**-5, 10**-3)
    plt.tight_layout()
    plt.savefig(os.path.splitext(args.output)[0]+'.pdf')

 if __name__=='__main__':
    main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Thu Jul 18 10:07:49 2019

	@author: s1529682
	"""

	from cooltools.lib import numutils
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import argparse
	import os

	def get_scaling(expfile, res, bins):
	expected = pd.read_csv(expfile, sep='\t')
	expected = expected[~expected['chrom'].isin(['chrY', 'chrM'])]
	expected['diag'] *= res
	expected['dist'] = pd.cut(expected['diag'], bins,
	labels=np.mean([bins[:-1], bins[1:]], axis=0))
	sc = expected.groupby('dist').mean()['balanced.avg']
	x = np.array(sc.index)
	y = sc.values
	return y/np.nansum(y)

	def main():
	parser = argparse.ArgumentParser(
	formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument("--resolution", '-r', type=int,
	help="""Resolution of the """)
	parser.add_argument("--output", '-o', type=str,
	help="""Where to save the data; will save a plot
	in the file witht he same namde but .pdf""")
	parser.add_argument("expected_files", type=str,
	nargs='*',
	help="""All files to analyse""")

	args = parser.parse_args()

	exps = args.expected_files

	names = [os.path.splitext(os.path.basename(name))[0] for name in exps]
	bins = numutils.logbins(args.resolution, 2.510*8, 1.2)

	scalings = {name:get_scaling(exp, args.resolution, bins) for name, exp in zip(names, exps)}

	bins = np.mean([bins[:-1], bins[1:]], axis=0)
	scalings = pd.DataFrame(scalings, index=bins).dropna()
	scalings = scalings.apply(lambda x: x/np.trapz(x.values, x.index))

	scalings.to_csv(args.output, sep='\t')

	f, ax = plt.subplots(figsize=(8, 8))
	plt.loglog()
	for name in scalings.columns:
	plt.plot(scalings.index, scalings[name], label=name, lw=2)
	ax.set_aspect('equal')
	ax.set_xlabel('Genomic distance')
	ax.set_ylabel('Normalized Hi-C signal')
	ax.grid()
	ax.legend()
	# plt.xlim(10**7)
	# plt.ylim(10-5, 10-3)
	plt.tight_layout()
	plt.savefig(os.path.splitext(args.output)[0]+'.pdf')

	if __name__=='__main__':
	main()