Skip to content

Instantly share code, notes, and snippets.

@chrisjurich
Last active April 13, 2021 01:19
Show Gist options
  • Save chrisjurich/3d25ae3490bfe2f209b0ff20b3f460e9 to your computer and use it in GitHub Desktop.
Save chrisjurich/3d25ae3490bfe2f209b0ff20b3f460e9 to your computer and use it in GitHub Desktop.
from scipy.stats import mannwhitneyu
# Source: https://www.biorxiv.org/content/10.1101/2020.06.29.178343v2.full.pdf
def dsci( sequence, target, dms ):
# TODO remember, have to replace the dead nt's with N
assert len( sequence ) == len( target ) and len( target ) == len( dms )
# first, gotta do the paired/unpaired
paired, unpaired = [], []
for nt, db, val in zip( sequence, target, dms):
if nt != 'A' and nt != 'C':
continue
if db == '.':
unpaired.append( val )
else:
paired.append( val )
if min( len( unpaired ), len( paired ) ) < 5:
raise Exception("Must be at least 5 unpaired and paired dms reactivity values")
#TODO probably need to add something about where there aren't a ton of observations
result = mannwhitneyu( unpaired, paired, alternative='greater' )
denominator = len( paired )*len( unpaired )
return ( result.statistic / denominator, result.pvalue )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment