Created
November 21, 2010 23:38
-
-
Save timothyclemans/709291 to your computer and use it in GitHub Desktop.
code for computational search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query = 'ALS agencies in King County' | |
def a_vs_b(a, b): | |
return | |
def agencies_in_location(type, location): | |
return | |
query_formats_to_functions = {} | |
non_values_to_query_formats = {} # given tuple of the non values return query format | |
def extract_non_values_from_format(format): | |
# find indexes of {% and %} and store as pairs | |
start = 0 | |
start_end_indexes_for_variable_names = [] | |
for i in range(format.count('{%')): | |
start_end_indexes_for_variable_names.append((format.find('{%', start), format.find('%}', start))) | |
start = start_end_indexes_for_variable_names[-1][1] | |
# find starting and ending indexes for the non variable elements | |
start_end_indexes_for_non_variable_elements = [] | |
for i in range(len(start_end_indexes_for_variable_names) - 1): | |
start = start_end_indexes_for_variable_names[i][1] + 3 | |
end = start_end_indexes_for_variable_names[i + 1][0] - 1 | |
start_end_indexes_for_non_variable_elements.append((start, end)) | |
non_variable_elements = [format[start:end] for start, end in start_end_indexes_for_non_variable_elements] | |
#return start_end_indexes_for_variable_names | |
#print non_variable_elements | |
return non_variable_elements | |
def add_format(format, function): | |
query_formats_to_functions.update({format: function}) | |
non_values_to_query_formats.update({tuple(extract_non_values_from_format(format)): format}) | |
add_format('{% a %} vs. {% b %}', a_vs_b) | |
add_format('{% agency_type %} agencies in {% location %}', agencies_in_location) | |
def find_matching_format(query): | |
for non_values_tuple in list(non_values_to_query_formats): | |
if not False in [non_value in query for non_value in non_values_tuple]: | |
return non_values_to_query_formats[non_values_tuple] | |
raise LookupError, 'No matching format found' | |
def extract_values_from_query(format, query): | |
non_values = extract_non_values_from_format(format) | |
# find start and end indexes of non values | |
start_end_indexes_of_non_values = [] | |
for i, non_value in enumerate(non_values): | |
start = query.find(non_value) | |
end = start + len(non_value) | |
start_end_indexes_of_non_values.append((start, end)) | |
# find start and end indexes of values | |
start_end_indexes_of_values = [] | |
if start_end_indexes_of_non_values[0][0] > 0 and start_end_indexes_of_non_values[-1][1] < len(query) - 1: | |
if len(non_values) == 1: | |
start_end_indexes_of_values = [(0, start_end_indexes_of_non_values[0][0] - 1), (start_end_indexes_of_non_values[0][1] + 1, None)] | |
return [query[start:end] for start, end in start_end_indexes_of_values] | |
print find_matching_format('Boston vs. Seattle') | |
print find_matching_format('BLS agencies in New York') | |
print find_matching_format('should raise an error') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment