Skip to content

Instantly share code, notes, and snippets.

@huynhbaoan
Created November 3, 2024 09:39
Show Gist options
  • Save huynhbaoan/fd1c9133c95a50f852be8d6714ed7d05 to your computer and use it in GitHub Desktop.
Save huynhbaoan/fd1c9133c95a50f852be8d6714ed7d05 to your computer and use it in GitHub Desktop.
def run_segmented_queries(start, end, query_string, interval=86400):
""" Run queries over smaller time intervals if results exceed the limit """
batch_query_results = []
current_start = start
while current_start < end:
current_end = min(current_start + interval - 1, end)
query_id = start_query(current_start, current_end, query_string)
# Wait for the query to complete
if query_id:
wait_for_all_queries_to_complete([query_id])
query_results_current = get_query_results(query_id, current_start, current_end)
# Check if limit was reached; if so, split time further
if len(query_results_current) == LIMIT and interval > 300: # Set a minimum interval of 5 minutes
print(f"Limit reached for {datetime.fromtimestamp(current_start)} - {datetime.fromtimestamp(current_end)}, further segmenting")
# Recursively segment into smaller intervals (e.g., half the current interval)
batch_query_results.extend(run_segmented_queries(current_start, current_end, query_string, interval=interval // 2))
else:
batch_query_results.extend(query_results_current)
current_start += interval
return batch_query_results
def get_query_results(query_id, start_time, end_time):
batch_query_results = []
total_records = 0
try:
response = LOG_CLIENT.get_query_results(queryId=query_id)
# Collect results and count records
for result in response['results']:
rec = {field['field']: field['value'] for field in result}
batch_query_results.append(rec)
total_records += 1
# Format and print the time range and total records
start_dt = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')
end_dt = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S')
print(f"Query ID {query_id}: Total records retrieved = {total_records} for time range {start_dt} - {end_dt}")
except Exception as e:
print(f"Error retrieving results for query ID {query_id}: {e}")
return batch_query_results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment