Skip to content

Instantly share code, notes, and snippets.

@chrisjurich
Last active June 24, 2020 17:58
Show Gist options
  • Save chrisjurich/f82142b2c8320e51a010cf3d50533f4c to your computer and use it in GitHub Desktop.
Save chrisjurich/f82142b2c8320e51a010cf3d50533f4c to your computer and use it in GitHub Desktop.
def eterna_recalculated(row,scale_max=2.3):
"""Helper method that recalculates the Eterna score for an entry from a dataframe. It will then put the score back into the row. Please note that there is not a 1:1 correspondence between the actual and recalculated scores"""
assert len(row["target_structure"]) == len(row["sequence"])
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
# data_len is the number of data points that will be reviewed
data_len = min(
len(row["target_structure"]),
len(row["SHAPE_data"]), # can probably get rid of this one
len(sequence),
sum([1 for val in row["SHAPE_data"] if val > 0]) # making sure that the SHAPE is not 0
)
# with the index length figured out, we can get the score, scale_factor and unpaired_bound for use in the calculations
score = 0
scale_factor = scale_max/max(row["SHAPE_data"])
unpaired_bound = min(0.25,row["threshold_SHAPE"])
for index in range(data_len):
# for each position, nee the secondary structure as well as the normalized shape and normalized shape error
ss = row["target_structure"][index]
shape = row["SHAPE_data"][index]*scale_factor
error = row["SHAPE_error"][index]*scale_factor
if ss == '.' and shape + error > unpaired_bound:
score += 1
elif ss != '.' and shape - error < 2*unpaired_bound:
score += 1
# with the calculations done, can assign the score
row["eterna_score_recalculated"] = score/data_len
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment