Last active
June 26, 2019 16:30
-
-
Save Phlya/95383aa153ff402b90d7a09357bd48cc to your computer and use it in GitHub Desktop.
Compare regions in two Hi-C maps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fetch_random_means(clr1, exp1, clr2, exp2, region_left, region_right=None, n=1000): | |
assert clr1.binsize==clr2.binsize | |
if region_right is None: | |
region_right = region_left | |
chrom_left, start_left, end_left = cooler.util.parse_region_string(region_left) | |
chrom_right, start_right, end_right = cooler.util.parse_region_string(region_right) | |
assert chrom_left == chrom_right | |
bin1_left = start_left//clr1.binsize | |
bin2_left = end_left //clr1.binsize | |
bin1_right = start_right//clr1.binsize | |
bin2_right = end_right //clr1.binsize | |
shift1 = bin1_right - bin1_left | |
shift2 = bin2_right - bin2_left | |
len_left = bin2_left - bin1_left | |
len_right = bin2_right - bin1_right | |
if (bin1_left < bin1_right) & (bin2_left > bin1_right): | |
raise ValueError('Regions should not overlap') | |
exp1 = exp1[exp1['chrom']==chrom_left]['balanced.avg'] | |
exp2 = exp2[exp2['chrom']==chrom_left]['balanced.avg'] | |
expected1 = LazyToeplitz(exp1)[bin1_left:bin2_left, bin1_right:bin2_right] | |
expected2 = LazyToeplitz(exp2)[bin1_left:bin2_left, bin1_right:bin2_right] | |
data1 = clr1.matrix(sparse=True, balance=True).fetch(chrom_left) | |
data1 = sparse.triu(data1, 2).tocsr() | |
data2 = clr2.matrix(sparse=True, balance=True).fetch(chrom_left) | |
data2 = sparse.triu(data2, 2).tocsr() | |
l = data1.shape[0] | |
allidx = np.arange(l) | |
np.random.shuffle(allidx) | |
allidx = np.append([bin1_left], allidx) | |
region1 = data1[bin1_left:bin2_left, bin1_right:bin2_right].toarray()#/expected1 | |
assert expected1.shape==region1.shape | |
if region_left==region_right: | |
ind = np.triu_indices_from(region1, 2) | |
else: | |
ind = np.s_[:] | |
means1 = [] | |
means2 = [] | |
i = 0 | |
while len(means1)<n: | |
idx = allidx[i] | |
i += 1 | |
if idx+shift1+len_right > l: | |
continue | |
try: | |
mat1 = (data1[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray()) | |
mean1 = np.nanmean((mat1/expected1)[ind]) | |
mat2 = (data2[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray()) | |
mean2 = np.nanmean((mat2/expected2)[ind]) | |
assert mat1.shape==region1.shape and mat2.shape==region1.shape | |
if np.mean(np.isnan(mat1))>0.1 or mean1==0 or np.mean(np.isnan(mat2))>0.1 or mean2==0: | |
continue | |
means1.append(mean1) | |
means2.append(mean2) | |
except Exception as e: | |
print(e) | |
print(region1.shape, mat1.shape, mat2.shape, expected1.shape, expected2.shape) | |
print(idx, idx+len_left, idx+shift1, idx+shift1+len_right) | |
raise e | |
regmean1 = means1[0] | |
regmean2 = means2[0] | |
return regmean1, np.asarray(means1)[1:], regmean2, np.asarray(means2)[1:] | |
means = fetch_random_means(coolers[sample1], expecteddata[sample1], coolers[sample2], expecteddata[sample2], region, 1000) | |
ratios = np.log2(means[1]/means[3]) | |
roirat = np.log2(means[0]/means[2]) | |
mn = np.mean(ratios) | |
std = np.std(ratios) | |
zscore = (roirat-mn)/std | |
p = 1- special.ndtr(zscore) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment