-
-
Save 1328/b1bc9562b5e8f62dab87 to your computer and use it in GitHub Desktop.
sequence matcher
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import defaultdict | |
| from functools import partial | |
| from pprint import pprint | |
| match = { | |
| 'abc': { | |
| '123':1, | |
| '234':2, | |
| '333':3, | |
| '4444':4, | |
| '55555':5, | |
| }, | |
| 'def': { | |
| '123':1, | |
| '234':2, | |
| '333':3, | |
| '4444':4, | |
| }, | |
| 'hiumpp': { | |
| '123':1, | |
| '234':2, | |
| '333':3, | |
| '4444':4, | |
| '55555':5, | |
| }, | |
| } | |
| def find_slice_sizes(match): | |
| ''' take a match dictionary and find out what different slice sizes | |
| we need to match to | |
| ''' | |
| # first we make a set of all the lengths. Set's automatically dedup | |
| generic_sizes = set([len(i) for i in match]) | |
| # but sets do not maintain order, so we sort and force into a list | |
| generic_sizes = list(sorted(generic_sizes)) | |
| # now we do the same thing for each specific | |
| specific_sizes = defaultdict(set) | |
| for generic, specifics in match.items(): | |
| for specific in specifics: | |
| specific_sizes[generic].add(len(specific)) | |
| # and here we force the sets into sorted lists | |
| specific_sizes = {k: list(sorted(v)) for k,v in | |
| specific_sizes.items()} | |
| return generic_sizes, specific_sizes | |
| def check_sequences(match, generic_sizes, specific_sizes, a, b): | |
| for generic_boundary in generic_sizes: | |
| g_seq = a[:generic_boundary] | |
| print(g_seq) | |
| if g_seq not in match: | |
| print('miss') | |
| continue | |
| print('hit') | |
| for specific_boundary in specific_sizes[g_seq]: | |
| s_seq = b[:specific_boundary] | |
| print('\ts_seq: {}'.format(s_seq)) | |
| if s_seq not in match[g_seq]: | |
| print('\tmiss') | |
| continue | |
| print('\thit') | |
| return match[g_seq][s_seq] | |
| # use better variable names than me, but you can see what I am doing here | |
| g,s = find_slice_sizes(match) | |
| pprint(g) | |
| pprint(s) | |
| # partial freezes a function so that you can fix the first n operators | |
| # here we freeze check_sequences, with match, generic_sizes, and | |
| # specific_sizes frozen | |
| check = partial(check_sequences, match, g, s) | |
| #now we can run check_sequences, with match, generic_sizes, and specific_sizes | |
| # already frozen in place with partial | |
| print(check('hiumppabcdefg', '33355555')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment