Created
September 18, 2024 20:03
-
-
Save angusdev/6ae0ca7066d073711fb0620ef4b9debd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from datetime import datetime | |
import pandas as pd | |
import argparse | |
def parse_log_files(file_names): | |
job_data = {} | |
job_pattern = re.compile(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (Thread-\d+) MYJOB (started|ended) (\d+) (.+)') | |
for file_name in file_names: | |
with open(file_name, 'r') as file: | |
for line in file: | |
match = job_pattern.search(line) | |
if match: | |
timestamp = datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S') | |
thread_name = match.group(2) | |
status = match.group(3) | |
job_id = match.group(4) | |
job_name = match.group(5) | |
if job_id not in job_data: | |
job_data[job_id] = {'job_name': job_name, 'details': {}} | |
if status == 'started': | |
job_data[job_id]['details'][file_name] = { | |
'start_time': timestamp, | |
'thread_name': thread_name | |
} | |
elif status == 'ended': | |
if file_name in job_data[job_id]['details']: | |
start_time = job_data[job_id]['details'][file_name]['start_time'] | |
duration = (timestamp - start_time).total_seconds() | |
job_data[job_id]['details'][file_name]['duration'] = duration | |
else: | |
job_data[job_id]['details'][file_name] = { | |
'thread_name': thread_name, | |
'duration': None | |
} | |
return job_data | |
def generate_excel(job_data, output_file): | |
# Prepare the data for Excel | |
job_list = [] | |
for job_id, details in job_data.items(): | |
job_entry = {'JOB_ID': job_id, 'JOB_NAME': details['job_name']} | |
for file_name, file_details in details['details'].items(): | |
job_entry[f'THREAD_NAME({file_name})'] = file_details['thread_name'] | |
job_entry[f'DURATION({file_name})'] = file_details.get('duration') | |
job_list.append(job_entry) | |
# Convert to DataFrame | |
df = pd.DataFrame(job_list) | |
# Write to Excel | |
df.to_excel(output_file, index=False) | |
def main(): | |
# Parse command-line arguments | |
parser = argparse.ArgumentParser(description='Parse log files and output job durations to an Excel file.') | |
parser.add_argument('input_files', nargs='+', help='Input log file names') | |
parser.add_argument('--output', default='job_durations.xlsx', help='Output Excel file name') | |
args = parser.parse_args() | |
# Parse log files and generate Excel | |
job_data = parse_log_files(args.input_files) | |
generate_excel(job_data, args.output) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment