Last active
August 12, 2016 17:40
-
-
Save itsmunim/dcb497b63b0f85d698b1bc21b41476a5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# def ngrams(tokens, n): | |
# num_tokens = len(tokens) | |
# if n > num_tokens: | |
# return | |
# ngram_list = [] | |
# for i in xrange(num_tokens): | |
# token_groups = [] | |
# for j in xrange(i, i+n): | |
# if j < num_tokens: | |
# token_groups.append(tokens[j]) | |
# ngram_list.append(token_groups) | |
# return ngram_list | |
_map = { | |
('A'): 'X1', | |
('B'): 'X2', | |
('C'): 'X3', | |
('D'): 'X4', | |
('E'): 'X5', | |
('A', 'B'): 'Y1', | |
('B', 'C', 'D'): 'Y2', | |
('C', 'D', 'F'): 'Y3', | |
('C', 'D', 'G'): 'Y4' | |
} | |
def get_similar_sequence_keys(_map, sequence): | |
similar_sequence_keys = [] | |
for k in _map: | |
if ''.join(k).startswith(''.join(sequence)): | |
similar_sequence_keys.append(k) | |
return similar_sequence_keys | |
def run(): | |
import re | |
token_regex = re.compile(r'\S+') | |
tokens = token_regex.findall(raw_input().strip()) | |
token_length = len(tokens) | |
output = [] | |
index = 0 | |
while index < len(tokens): | |
token = tokens[index] | |
similar_sequence_keys = sorted(get_similar_sequence_keys(_map, token), reverse=True) | |
if not similar_sequence_keys: | |
break | |
for sequence_key in similar_sequence_keys: | |
longest_sequence_length = len(sequence_key) | |
last_index_limit = index + longest_sequence_length | |
token_sequence_list = tokens[index:last_index_limit] | |
if len(token_sequence_list) == 1: | |
token_tuple = token_sequence_list[0] | |
else: | |
token_tuple = tuple(token_sequence_list) | |
if token_tuple in similar_sequence_keys: | |
output.append(_map.get(token_tuple)) | |
index = last_index_limit | |
break | |
return output | |
if __name__ == "__main__": | |
output = run() | |
print output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment