Last active
April 13, 2021 01:19
-
-
Save chrisjurich/3d25ae3490bfe2f209b0ff20b3f460e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import mannwhitneyu | |
# Source: https://www.biorxiv.org/content/10.1101/2020.06.29.178343v2.full.pdf | |
def dsci( sequence, target, dms ): | |
# TODO remember, have to replace the dead nt's with N | |
assert len( sequence ) == len( target ) and len( target ) == len( dms ) | |
# first, gotta do the paired/unpaired | |
paired, unpaired = [], [] | |
for nt, db, val in zip( sequence, target, dms): | |
if nt != 'A' and nt != 'C': | |
continue | |
if db == '.': | |
unpaired.append( val ) | |
else: | |
paired.append( val ) | |
if min( len( unpaired ), len( paired ) ) < 5: | |
raise Exception("Must be at least 5 unpaired and paired dms reactivity values") | |
#TODO probably need to add something about where there aren't a ton of observations | |
result = mannwhitneyu( unpaired, paired, alternative='greater' ) | |
denominator = len( paired )*len( unpaired ) | |
return ( result.statistic / denominator, result.pvalue ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment