Created
May 30, 2025 07:34
-
-
Save vedantroy/a7f4bf161061276bf12180ac0eefeb6d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.8" | |
# dependencies = [ | |
# "reportlab>=3.6.0", | |
# ] | |
# /// | |
import json | |
from reportlab.lib.pagesizes import letter | |
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer | |
from reportlab.lib.styles import getSampleStyleSheet | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
import argparse | |
import os | |
from html import escape | |
# Coloring scheme functions | |
def color_scheme_none(record, optimal_low, optimal_high): | |
"""Always returns grey - no coloring based on values""" | |
return "grey" | |
def color_scheme_optimal_aware(record, optimal_low, optimal_high): | |
""" | |
Optimal-aware coloring scheme: | |
- Green: in optimal range OR (no optimal range exists but in normal range) | |
- Yellow: in normal range but outside optimal range | |
- Red: out of normal range | |
- Grey: unknown status | |
""" | |
out_of_range_type = record.get("outOfRangeType", "unknown") | |
# First check: out of range -> Red | |
if out_of_range_type in ["above", "below"]: | |
return "red" | |
# Second check: in range | |
elif out_of_range_type == "in_range": | |
# Check if optimal range exists | |
has_optimal = (optimal_low and optimal_low != "") or (optimal_high and optimal_high != "") | |
if not has_optimal: | |
# No optimal range but in range -> Green | |
return "green" | |
# Optimal range exists, check if value is within it | |
try: | |
current_result = record.get("currentResult", {}) | |
calculated_result = current_result.get("calculatedResult") | |
if calculated_result and calculated_result != "N/A": | |
value = float(calculated_result) | |
within_low = True | |
within_high = True | |
if optimal_low and optimal_low != "": | |
within_low = value >= float(optimal_low) | |
if optimal_high and optimal_high != "": | |
within_high = value <= float(optimal_high) | |
if within_low and within_high: | |
return "green" # In optimal range | |
else: | |
return "yellow" # In range but not optimal | |
except (ValueError, TypeError): | |
# Can't parse values, but we know it's in range | |
return "green" | |
# Unknown status -> Grey | |
return "grey" | |
def color_scheme_basic(record, optimal_low, optimal_high): | |
""" | |
Basic coloring scheme: | |
- Grey: in normal range or unknown | |
- Red: out of normal range | |
""" | |
out_of_range_type = record.get("outOfRangeType", "unknown") | |
if out_of_range_type in ["above", "below"]: | |
return "red" | |
else: | |
return "grey" | |
# Select which coloring scheme to use by commenting/uncommenting | |
# ACTIVE_COLOR_SCHEME = color_scheme_none | |
ACTIVE_COLOR_SCHEME = color_scheme_optimal_aware | |
# ACTIVE_COLOR_SCHEME = color_scheme_basic | |
def get_color_for_value(color_type): | |
"""Maps color type to actual reportlab color""" | |
color_map = { | |
"red": colors.lightpink, | |
"yellow": colors.lightyellow, | |
"green": colors.lightgreen, | |
"grey": colors.lightgrey | |
} | |
return color_map.get(color_type, colors.lightgrey) | |
def get_optimal_range_for_gender(sex_details, gender): | |
""" | |
Gets the optimal range for the specified gender. | |
Falls back to 'All' if gender-specific range not found. | |
""" | |
if not sex_details: | |
return "", "" | |
# Try to find gender-specific range first | |
for detail in sex_details: | |
if detail.get("sex", "").lower() == gender.lower(): | |
return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "") | |
# Fall back to "All" if gender-specific not found | |
for detail in sex_details: | |
if detail.get("sex", "").lower() == "all": | |
return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "") | |
# If neither found, return first entry or empty | |
if sex_details: | |
return sex_details[0].get("optimalRangeLow", ""), sex_details[0].get("optimalRangeHigh", "") | |
return "", "" | |
def create_pdf_report(biomarker_records, output_pdf_path, gender="male"): | |
""" | |
Generates the PDF report part. | |
""" | |
doc = SimpleDocTemplate(output_pdf_path, pagesize=letter, | |
rightMargin=inch/2, leftMargin=inch/2, | |
topMargin=inch/2, bottomMargin=inch/2) | |
story = [] | |
styles = getSampleStyleSheet() | |
title_style = styles['h1'] | |
normal_style = styles['Normal'] | |
normal_style.fontSize = 8 # Smaller font for density | |
title = Paragraph(f"Biomarker Report (Dense Format) - {gender.capitalize()}", title_style) | |
story.append(title) | |
story.append(Spacer(1, 0.2*inch)) | |
table_data = [["Biomarker Name", "Measurement", "Expected Range", "Optimal Range", "Unit"]] | |
row_colors = [] # Track which rows should be highlighted | |
row_color_types = [] # Track the type of highlighting | |
for i, record in enumerate(biomarker_records): | |
biomarker_name = record.get("biomarker", {}).get("name", "N/A") | |
current_result = record.get("currentResult", {}) | |
measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A") | |
unit = record.get("units", "") | |
expected_range = record.get("rangeString", "N/A") | |
out_of_range_type = record.get("outOfRangeType", "in_range") | |
# Get optimal range from biomarker.sexDetails for the specified gender | |
optimal_range = "N/A" | |
optimal_low = "" | |
optimal_high = "" | |
biomarker_info = record.get("biomarker", {}) | |
sex_details = biomarker_info.get("sexDetails", []) | |
if sex_details: | |
optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender) | |
# Format optimal range string | |
if optimal_low and optimal_high: | |
optimal_range = f"{optimal_low}-{optimal_high}" | |
elif optimal_high and not optimal_low: | |
optimal_range = f"<{optimal_high}" | |
elif optimal_low and not optimal_high: | |
optimal_range = f">{optimal_low}" | |
else: | |
optimal_range = "N/A" | |
# Escape HTML special characters to prevent parsing errors | |
table_data.append([ | |
Paragraph(escape(str(biomarker_name)), normal_style), | |
Paragraph(escape(str(measurement)), normal_style), | |
Paragraph(escape(str(expected_range)), normal_style), | |
Paragraph(escape(str(optimal_range)), normal_style), | |
Paragraph(escape(str(unit)), normal_style) | |
]) | |
# Get color using the active color scheme | |
color_type = ACTIVE_COLOR_SCHEME(record, optimal_low, optimal_high) | |
row_color_types.append(color_type) | |
if len(table_data) > 1: | |
col_widths = [2.5*inch, 1.2*inch, 1.3*inch, 1.0*inch, 1.0*inch] | |
table = Table(table_data, colWidths=col_widths) | |
style_commands = [ | |
('BACKGROUND', (0, 0), (-1, 0), colors.grey), | |
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), | |
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), | |
('FONTSIZE', (0, 0), (-1, 0), 10), | |
('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
('GRID', (0, 0), (-1, -1), 1, colors.black), | |
('LEFTPADDING', (0,0), (-1,-1), 4), | |
('RIGHTPADDING', (0,0), (-1,-1), 4), | |
('TOPPADDING', (0,0), (-1,-1), 2), | |
('BOTTOMPADDING', (0,0), (-1,-1), 2), | |
] | |
# Add background colors for all non-header rows | |
for i in range(1, len(table_data)): | |
if i-1 < len(row_color_types): | |
color_type = row_color_types[i-1] | |
bg_color = get_color_for_value(color_type) | |
style_commands.append(('BACKGROUND', (0, i), (-1, i), bg_color)) | |
style = TableStyle(style_commands) | |
table.setStyle(style) | |
story.append(table) | |
else: | |
story.append(Paragraph("No biomarker data found for PDF.", normal_style)) | |
try: | |
doc.build(story) | |
print(f"PDF report generated: {output_pdf_path}") | |
except Exception as e: | |
print(f"Error building PDF: {e}") | |
def create_text_report(biomarker_records, output_text_path, gender="male"): | |
""" | |
Generates the plain text report part. | |
""" | |
report_lines = [f"Biomarker Report (Text Format for LLM Ingestion) - {gender.capitalize()}\n", "---"] | |
if not biomarker_records: | |
report_lines.append("No biomarker data found.") | |
else: | |
for record in biomarker_records: | |
biomarker_name = record.get("biomarker", {}).get("name", "N/A") | |
current_result = record.get("currentResult", {}) | |
measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A") | |
unit = record.get("units", "") | |
expected_range = record.get("rangeString", "N/A") | |
# Get optimal range from biomarker.sexDetails for the specified gender | |
optimal_range = "N/A" | |
biomarker_info = record.get("biomarker", {}) | |
sex_details = biomarker_info.get("sexDetails", []) | |
if sex_details: | |
optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender) | |
# Format optimal range string | |
if optimal_low and optimal_high: | |
optimal_range = f"{optimal_low}-{optimal_high}" | |
elif optimal_high and not optimal_low: | |
optimal_range = f"<{optimal_high}" | |
elif optimal_low and not optimal_high: | |
optimal_range = f">{optimal_low}" | |
else: | |
optimal_range = "N/A" | |
# Format the line with all information | |
line = f"{biomarker_name}: {measurement} {unit}".strip() | |
line += f" | Expected: {expected_range}" | |
if optimal_range != "N/A": | |
line += f" | Optimal: {optimal_range}" | |
report_lines.append(line) | |
try: | |
with open(output_text_path, 'w') as f: | |
for line in report_lines: | |
f.write(line + "\n") | |
print(f"Text report generated: {output_text_path}") | |
except Exception as e: | |
print(f"Error writing text report: {e}") | |
def process_biomarker_data(json_file_path, output_pdf_path, output_text_path, gender="male"): | |
""" | |
Processes biomarker JSON data and generates PDF and Text reports. | |
""" | |
with open(json_file_path, 'r') as f: | |
data = json.load(f) | |
biomarker_records = data.get("data", {}).get("biomarkerResultsRecord", []) | |
if not biomarker_records: | |
print("No 'biomarkerResultsRecord' found in the JSON data.") | |
with open(output_pdf_path, 'wb') as f: | |
SimpleDocTemplate(f).build([Paragraph("No biomarker data found.")]) | |
with open(output_text_path, 'w') as f: | |
f.write("No biomarker data found in the input JSON.") | |
print(f"Empty/Notice PDF created: {output_pdf_path}") | |
print(f"Empty/Notice Text file created: {output_text_path}") | |
return | |
create_pdf_report(biomarker_records, output_pdf_path, gender) | |
create_text_report(biomarker_records, output_text_path, gender) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Generate PDF and Text reports from biomarker JSON data.") | |
parser.add_argument("json_file", help="Path to the input JSON file.") | |
parser.add_argument("-op", "--output_pdf", | |
help="Path for the output PDF file (default: [json_filename]_dense_report.pdf).") | |
parser.add_argument("-ot", "--output_text", | |
help="Path for the output Text file (default: [json_filename]_llm_data.txt).") | |
parser.add_argument("-g", "--gender", | |
default="male", | |
choices=["male", "female"], | |
help="Gender for optimal range filtering (default: male).") | |
args = parser.parse_args() | |
json_input_path = args.json_file | |
base_name = os.path.splitext(os.path.basename(json_input_path))[0] | |
output_dir = os.path.dirname(json_input_path) | |
if not output_dir: | |
output_dir = "." | |
pdf_output_path = args.output_pdf if args.output_pdf else os.path.join(output_dir, f"{base_name}_dense_report.pdf") | |
text_output_path = args.output_text if args.output_text else os.path.join(output_dir, f"{base_name}_llm_data.txt") | |
process_biomarker_data(json_input_path, pdf_output_path, text_output_path, args.gender) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment