Skip to content

Instantly share code, notes, and snippets.

@radusuciu
Created June 30, 2017 20:17
Show Gist options
  • Save radusuciu/21de7fa9824069b3fd01e8362f7c4d6a to your computer and use it in GitHub Desktop.
Save radusuciu/21de7fa9824069b3fd01e8362f7c4d6a to your computer and use it in GitHub Desktop.
'''Simplistic isoform filtering.'''
from Levenshtein import distance
import math
for sequence, item in d.items():
row = []
for protein in item:
min_distance = math.inf
symbol = protein[2]
for other in [p for p in item if p != protein]:
min_distance = min(min_distance, distance(symbol, other[2]))
if min_distance <= 1:
break
if min_distance > 1:
row.append(protein)
row = [r for r in row if r[2] != 'Uncharacterized']
if len(row) > 1:
print('\t'.join(['{} ({})'.format(x[2], x[-2]) for x in row]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment