Last active
November 11, 2018 17:16
-
-
Save Phlya/340af6c45900902310a7bb8d9cc60537 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dedup(dots, hiccups_filter=True): | |
newdots = [] | |
ress = list(sorted(set(dots['res']))) | |
for chrom in sorted(set(dots['chrom1'])): | |
chromdots = dots[dots['chrom1']==chrom].sort_values(['start1', 'start2']).reset_index(drop=True) | |
for res in ress: | |
chromdots['Supported_%s' % res] = (chromdots['res']==res) | |
tree = spatial.cKDTree(chromdots[['start1', 'start2']]) #Not sure what's the best coordinate to use, of the strongest pixel, centroid, or the center of the dot? | |
drop = [] | |
for i, j in tree.query_pairs(r=50000): | |
ires = chromdots.at[i, 'res'] | |
jres = chromdots.at[j, 'res'] | |
chromdots.at[j, 'Supported_%s' % ires] = True | |
chromdots.at[i, 'Supported_%s' % jres] = True | |
if ress[-1] in (ires, jres) or abs(chromdots.at[j, 'start1']-chromdots.at[i, 'start1'])<=20000: | |
if ires>jres: | |
drop.append(i) | |
else: | |
drop.append(j) | |
newdots.append(chromdots.drop(drop)) | |
deduped = pd.concat(newdots).sort_values(['chrom1', 'start1', 'start2']) | |
if hiccups_filter: | |
l = len(deduped) | |
deduped = deduped[~((deduped['start2']-deduped['start1']>100000) & (~np.any(deduped[['Supported_%s'%res for res in ress[1:]]], axis=1)))] | |
print(l-len(deduped), 'loops filtered out as unreliable %s resolution calls' % ress[0]) | |
return deduped |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment