Phlya · June 26, 2019 16:30
diff --git a/compare_hic_region.py b/compare_hic_region.py
 def fetch_random_means(clr1, exp1, clr2, exp2, region_left, region_right=None, n=1000): 
    assert clr1.binsize==clr2.binsize
    if region_right is None:
        region_right = region_left
    
    chrom_left, start_left, end_left = cooler.util.parse_region_string(region_left)
    chrom_right, start_right, end_right = cooler.util.parse_region_string(region_right)
    assert chrom_left == chrom_right

    bin1_left = start_left//clr1.binsize
    bin2_left = end_left  //clr1.binsize
    bin1_right = start_right//clr1.binsize
    bin2_right = end_right  //clr1.binsize
    
    shift1 = bin1_right - bin1_left
    shift2 = bin2_right - bin2_left
    len_left = bin2_left - bin1_left
    len_right = bin2_right - bin1_right
    
    if (bin1_left < bin1_right) & (bin2_left > bin1_right):
        raise ValueError('Regions should not overlap')
        
    exp1 = exp1[exp1['chrom']==chrom_left]['balanced.avg']
    exp2 = exp2[exp2['chrom']==chrom_left]['balanced.avg']
    
    expected1 = LazyToeplitz(exp1)[bin1_left:bin2_left, bin1_right:bin2_right]
    expected2 = LazyToeplitz(exp2)[bin1_left:bin2_left, bin1_right:bin2_right]
       
    data1 = clr1.matrix(sparse=True, balance=True).fetch(chrom_left)
    data1 = sparse.triu(data1, 2).tocsr()
    
    data2 = clr2.matrix(sparse=True, balance=True).fetch(chrom_left)
    data2 = sparse.triu(data2, 2).tocsr()
    
    l = data1.shape[0]
    allidx = np.arange(l)
    np.random.shuffle(allidx)
    
    allidx = np.append([bin1_left], allidx)
    region1 = data1[bin1_left:bin2_left, bin1_right:bin2_right].toarray()#/expected1
    assert expected1.shape==region1.shape
    if region_left==region_right:
        ind = np.triu_indices_from(region1, 2)
    else:
        ind = np.s_[:]
    
    means1 = []
    means2 = []
    i = 0       
    while len(means1)<n:
        idx = allidx[i]
        i += 1
        if idx+shift1+len_right > l:
            continue
        try:
            mat1 = (data1[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
            mean1 = np.nanmean((mat1/expected1)[ind])

            mat2 = (data2[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
            mean2 = np.nanmean((mat2/expected2)[ind])
            assert mat1.shape==region1.shape and mat2.shape==region1.shape

            if np.mean(np.isnan(mat1))>0.1 or mean1==0 or np.mean(np.isnan(mat2))>0.1 or mean2==0:
                continue
            means1.append(mean1)
            means2.append(mean2)
        except Exception as e:
            print(e)
            print(region1.shape, mat1.shape, mat2.shape, expected1.shape, expected2.shape)
            print(idx, idx+len_left, idx+shift1, idx+shift1+len_right)
            raise e
    regmean1 = means1[0]
    regmean2 = means2[0]
    return regmean1, np.asarray(means1)[1:], regmean2, np.asarray(means2)[1:]
    
 means = fetch_random_means(coolers[sample1], expecteddata[sample1], coolers[sample2], expecteddata[sample2], region, 1000)
 ratios = np.log2(means[1]/means[3])
 roirat = np.log2(means[0]/means[2])

 mn = np.mean(ratios)
 std = np.std(ratios)
 zscore = (roirat-mn)/std
 p = 1- special.ndtr(zscore)
	def fetch_random_means(clr1, exp1, clr2, exp2, region_left, region_right=None, n=1000):
	assert clr1.binsize==clr2.binsize
	if region_right is None:
	region_right = region_left

	chrom_left, start_left, end_left = cooler.util.parse_region_string(region_left)
	chrom_right, start_right, end_right = cooler.util.parse_region_string(region_right)
	assert chrom_left == chrom_right

	bin1_left = start_left//clr1.binsize
	bin2_left = end_left //clr1.binsize
	bin1_right = start_right//clr1.binsize
	bin2_right = end_right //clr1.binsize

	shift1 = bin1_right - bin1_left
	shift2 = bin2_right - bin2_left
	len_left = bin2_left - bin1_left
	len_right = bin2_right - bin1_right

	if (bin1_left < bin1_right) & (bin2_left > bin1_right):
	raise ValueError('Regions should not overlap')

	exp1 = exp1[exp1['chrom']==chrom_left]['balanced.avg']
	exp2 = exp2[exp2['chrom']==chrom_left]['balanced.avg']

	expected1 = LazyToeplitz(exp1)[bin1_left:bin2_left, bin1_right:bin2_right]
	expected2 = LazyToeplitz(exp2)[bin1_left:bin2_left, bin1_right:bin2_right]

	data1 = clr1.matrix(sparse=True, balance=True).fetch(chrom_left)
	data1 = sparse.triu(data1, 2).tocsr()

	data2 = clr2.matrix(sparse=True, balance=True).fetch(chrom_left)
	data2 = sparse.triu(data2, 2).tocsr()

	l = data1.shape[0]
	allidx = np.arange(l)
	np.random.shuffle(allidx)

	allidx = np.append([bin1_left], allidx)
	region1 = data1[bin1_left:bin2_left, bin1_right:bin2_right].toarray()#/expected1
	assert expected1.shape==region1.shape
	if region_left==region_right:
	ind = np.triu_indices_from(region1, 2)
	else:
	ind = np.s_[:]

	means1 = []
	means2 = []
	i = 0
	while len(means1)<n:
	idx = allidx[i]
	i += 1
	if idx+shift1+len_right > l:
	continue
	try:
	mat1 = (data1[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
	mean1 = np.nanmean((mat1/expected1)[ind])

	mat2 = (data2[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
	mean2 = np.nanmean((mat2/expected2)[ind])
	assert mat1.shape==region1.shape and mat2.shape==region1.shape

	if np.mean(np.isnan(mat1))>0.1 or mean1==0 or np.mean(np.isnan(mat2))>0.1 or mean2==0:
	continue
	means1.append(mean1)
	means2.append(mean2)
	except Exception as e:
	print(e)
	print(region1.shape, mat1.shape, mat2.shape, expected1.shape, expected2.shape)
	print(idx, idx+len_left, idx+shift1, idx+shift1+len_right)
	raise e
	regmean1 = means1[0]
	regmean2 = means2[0]
	return regmean1, np.asarray(means1)[1:], regmean2, np.asarray(means2)[1:]

	means = fetch_random_means(coolers[sample1], expecteddata[sample1], coolers[sample2], expecteddata[sample2], region, 1000)
	ratios = np.log2(means[1]/means[3])
	roirat = np.log2(means[0]/means[2])

	mn = np.mean(ratios)
	std = np.std(ratios)
	zscore = (roirat-mn)/std
	p = 1- special.ndtr(zscore)