Skip to content

Instantly share code, notes, and snippets.

@1328
Last active August 29, 2015 14:19
Show Gist options
  • Select an option

  • Save 1328/b1bc9562b5e8f62dab87 to your computer and use it in GitHub Desktop.

Select an option

Save 1328/b1bc9562b5e8f62dab87 to your computer and use it in GitHub Desktop.
sequence matcher
from collections import defaultdict
from functools import partial
from pprint import pprint
match = {
'abc': {
'123':1,
'234':2,
'333':3,
'4444':4,
'55555':5,
},
'def': {
'123':1,
'234':2,
'333':3,
'4444':4,
},
'hiumpp': {
'123':1,
'234':2,
'333':3,
'4444':4,
'55555':5,
},
}
def find_slice_sizes(match):
''' take a match dictionary and find out what different slice sizes
we need to match to
'''
# first we make a set of all the lengths. Set's automatically dedup
generic_sizes = set([len(i) for i in match])
# but sets do not maintain order, so we sort and force into a list
generic_sizes = list(sorted(generic_sizes))
# now we do the same thing for each specific
specific_sizes = defaultdict(set)
for generic, specifics in match.items():
for specific in specifics:
specific_sizes[generic].add(len(specific))
# and here we force the sets into sorted lists
specific_sizes = {k: list(sorted(v)) for k,v in
specific_sizes.items()}
return generic_sizes, specific_sizes
def check_sequences(match, generic_sizes, specific_sizes, a, b):
for generic_boundary in generic_sizes:
g_seq = a[:generic_boundary]
print(g_seq)
if g_seq not in match:
print('miss')
continue
print('hit')
for specific_boundary in specific_sizes[g_seq]:
s_seq = b[:specific_boundary]
print('\ts_seq: {}'.format(s_seq))
if s_seq not in match[g_seq]:
print('\tmiss')
continue
print('\thit')
return match[g_seq][s_seq]
# use better variable names than me, but you can see what I am doing here
g,s = find_slice_sizes(match)
pprint(g)
pprint(s)
# partial freezes a function so that you can fix the first n operators
# here we freeze check_sequences, with match, generic_sizes, and
# specific_sizes frozen
check = partial(check_sequences, match, g, s)
#now we can run check_sequences, with match, generic_sizes, and specific_sizes
# already frozen in place with partial
print(check('hiumppabcdefg', '33355555'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment