Skip to content

Instantly share code, notes, and snippets.

@skalahonza
Created January 15, 2025 09:57
Show Gist options
  • Select an option

  • Save skalahonza/a355ea1027cd82baa99c2a4f2f51d985 to your computer and use it in GitHub Desktop.

Select an option

Save skalahonza/a355ea1027cd82baa99c2a4f2f51d985 to your computer and use it in GitHub Desktop.
Analyze xunit test run
import pandas as pd
from bs4 import BeautifulSoup
# Update: Ensure file path handling is robust
file_path = "C:/Users/jskal/OneDrive/Plocha/tests.txt"
try:
# Load the HTML file
with open(file_path, 'r') as file:
html_content = file.read()
except FileNotFoundError:
raise FileNotFoundError(f"The file at {file_path} was not found. Please ensure the path is correct and the file exists.")
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Extract the table
table = soup.find('table')
if table is None:
raise ValueError("No <table> element found in the HTML file. Please check the file content.")
# Extract headers
headers = [th.text.strip() for th in table.find_all('th')]
if not headers:
raise ValueError("No headers found in the table. Ensure the table has a proper <thead> section.")
# Extract rows
rows = []
for tr in table.find_all('tr')[1:]: # Skip the header row
cells = [td.text.strip() for td in tr.find_all('td')]
rows.append(cells)
# Create a DataFrame
if headers and rows:
df = pd.DataFrame(rows, columns=headers)
else:
raise ValueError("No rows found in the table. Ensure the table has valid data rows.")
# Clean and process the DataFrame
# Extract Section from 'Test suite' column
def extract_section(test_suite):
parts = test_suite.split('.')
return parts[4] if len(parts) > 4 else None
df['Section'] = df['Test suite'].apply(extract_section)
# Convert time strings to milliseconds
def convert_time_to_ms(time_str):
if 'ms' in time_str:
return float(time_str.replace('ms', ''))
elif 's' in time_str:
return float(time_str.replace('s', '')) * 1000
else:
return 0
df['Time_ms'] = df['Time'].apply(convert_time_to_ms)
# Sum time by section
section_summary = df.groupby('Section')['Time_ms'].sum().reset_index().sort_values(by='Time_ms', ascending=False)
# Rename columns for clarity
section_summary.rename(columns={'Time_ms': 'Total Time (ms)'}, inplace=True)
import pandas as pd
# Display the summarized data to the user
print("Test Duration Summary by Section:")
print(section_summary)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment