Created
November 3, 2024 09:39
-
-
Save huynhbaoan/fd1c9133c95a50f852be8d6714ed7d05 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def run_segmented_queries(start, end, query_string, interval=86400): | |
""" Run queries over smaller time intervals if results exceed the limit """ | |
batch_query_results = [] | |
current_start = start | |
while current_start < end: | |
current_end = min(current_start + interval - 1, end) | |
query_id = start_query(current_start, current_end, query_string) | |
# Wait for the query to complete | |
if query_id: | |
wait_for_all_queries_to_complete([query_id]) | |
query_results_current = get_query_results(query_id, current_start, current_end) | |
# Check if limit was reached; if so, split time further | |
if len(query_results_current) == LIMIT and interval > 300: # Set a minimum interval of 5 minutes | |
print(f"Limit reached for {datetime.fromtimestamp(current_start)} - {datetime.fromtimestamp(current_end)}, further segmenting") | |
# Recursively segment into smaller intervals (e.g., half the current interval) | |
batch_query_results.extend(run_segmented_queries(current_start, current_end, query_string, interval=interval // 2)) | |
else: | |
batch_query_results.extend(query_results_current) | |
current_start += interval | |
return batch_query_results | |
def get_query_results(query_id, start_time, end_time): | |
batch_query_results = [] | |
total_records = 0 | |
try: | |
response = LOG_CLIENT.get_query_results(queryId=query_id) | |
# Collect results and count records | |
for result in response['results']: | |
rec = {field['field']: field['value'] for field in result} | |
batch_query_results.append(rec) | |
total_records += 1 | |
# Format and print the time range and total records | |
start_dt = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S') | |
end_dt = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S') | |
print(f"Query ID {query_id}: Total records retrieved = {total_records} for time range {start_dt} - {end_dt}") | |
except Exception as e: | |
print(f"Error retrieving results for query ID {query_id}: {e}") | |
return batch_query_results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment