Last active
June 24, 2020 17:58
-
-
Save chrisjurich/f82142b2c8320e51a010cf3d50533f4c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def eterna_recalculated(row,scale_max=2.3): | |
"""Helper method that recalculates the Eterna score for an entry from a dataframe. It will then put the score back into the row. Please note that there is not a 1:1 correspondence between the actual and recalculated scores""" | |
assert len(row["target_structure"]) == len(row["sequence"]) | |
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed | |
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"]) | |
# data_len is the number of data points that will be reviewed | |
data_len = min( | |
len(row["target_structure"]), | |
len(row["SHAPE_data"]), # can probably get rid of this one | |
len(sequence), | |
sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0 | |
) | |
# with the index length figured out, we can get the score, scale_factor and unpaired_bound for use in the calculations | |
score = 0 | |
scale_factor = scale_max/max(row["SHAPE_data"]) | |
unpaired_bound = min(0.25,row["threshold_SHAPE"]) | |
for index in range(data_len): | |
# for each position, nee the secondary structure as well as the normalized shape and normalized shape error | |
ss = row["target_structure"][index] | |
shape = row["SHAPE_data"][index]*scale_factor | |
error = row["SHAPE_error"][index]*scale_factor | |
if ss == '.' and shape + error > unpaired_bound: | |
score += 1 | |
elif ss != '.' and shape - error < 2*unpaired_bound: | |
score += 1 | |
# with the calculations done, can assign the score | |
row["eterna_score_recalculated"] = score/data_len |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment