Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vedantroy/a7f4bf161061276bf12180ac0eefeb6d to your computer and use it in GitHub Desktop.
Save vedantroy/a7f4bf161061276bf12180ac0eefeb6d to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.8"
# dependencies = [
# "reportlab>=3.6.0",
# ]
# ///
import json
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.lib import colors
import argparse
import os
from html import escape
# Coloring scheme functions
def color_scheme_none(record, optimal_low, optimal_high):
"""Always returns grey - no coloring based on values"""
return "grey"
def color_scheme_optimal_aware(record, optimal_low, optimal_high):
"""
Optimal-aware coloring scheme:
- Green: in optimal range OR (no optimal range exists but in normal range)
- Yellow: in normal range but outside optimal range
- Red: out of normal range
- Grey: unknown status
"""
out_of_range_type = record.get("outOfRangeType", "unknown")
# First check: out of range -> Red
if out_of_range_type in ["above", "below"]:
return "red"
# Second check: in range
elif out_of_range_type == "in_range":
# Check if optimal range exists
has_optimal = (optimal_low and optimal_low != "") or (optimal_high and optimal_high != "")
if not has_optimal:
# No optimal range but in range -> Green
return "green"
# Optimal range exists, check if value is within it
try:
current_result = record.get("currentResult", {})
calculated_result = current_result.get("calculatedResult")
if calculated_result and calculated_result != "N/A":
value = float(calculated_result)
within_low = True
within_high = True
if optimal_low and optimal_low != "":
within_low = value >= float(optimal_low)
if optimal_high and optimal_high != "":
within_high = value <= float(optimal_high)
if within_low and within_high:
return "green" # In optimal range
else:
return "yellow" # In range but not optimal
except (ValueError, TypeError):
# Can't parse values, but we know it's in range
return "green"
# Unknown status -> Grey
return "grey"
def color_scheme_basic(record, optimal_low, optimal_high):
"""
Basic coloring scheme:
- Grey: in normal range or unknown
- Red: out of normal range
"""
out_of_range_type = record.get("outOfRangeType", "unknown")
if out_of_range_type in ["above", "below"]:
return "red"
else:
return "grey"
# Select which coloring scheme to use by commenting/uncommenting
# ACTIVE_COLOR_SCHEME = color_scheme_none
ACTIVE_COLOR_SCHEME = color_scheme_optimal_aware
# ACTIVE_COLOR_SCHEME = color_scheme_basic
def get_color_for_value(color_type):
"""Maps color type to actual reportlab color"""
color_map = {
"red": colors.lightpink,
"yellow": colors.lightyellow,
"green": colors.lightgreen,
"grey": colors.lightgrey
}
return color_map.get(color_type, colors.lightgrey)
def get_optimal_range_for_gender(sex_details, gender):
"""
Gets the optimal range for the specified gender.
Falls back to 'All' if gender-specific range not found.
"""
if not sex_details:
return "", ""
# Try to find gender-specific range first
for detail in sex_details:
if detail.get("sex", "").lower() == gender.lower():
return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")
# Fall back to "All" if gender-specific not found
for detail in sex_details:
if detail.get("sex", "").lower() == "all":
return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")
# If neither found, return first entry or empty
if sex_details:
return sex_details[0].get("optimalRangeLow", ""), sex_details[0].get("optimalRangeHigh", "")
return "", ""
def create_pdf_report(biomarker_records, output_pdf_path, gender="male"):
"""
Generates the PDF report part.
"""
doc = SimpleDocTemplate(output_pdf_path, pagesize=letter,
rightMargin=inch/2, leftMargin=inch/2,
topMargin=inch/2, bottomMargin=inch/2)
story = []
styles = getSampleStyleSheet()
title_style = styles['h1']
normal_style = styles['Normal']
normal_style.fontSize = 8 # Smaller font for density
title = Paragraph(f"Biomarker Report (Dense Format) - {gender.capitalize()}", title_style)
story.append(title)
story.append(Spacer(1, 0.2*inch))
table_data = [["Biomarker Name", "Measurement", "Expected Range", "Optimal Range", "Unit"]]
row_colors = [] # Track which rows should be highlighted
row_color_types = [] # Track the type of highlighting
for i, record in enumerate(biomarker_records):
biomarker_name = record.get("biomarker", {}).get("name", "N/A")
current_result = record.get("currentResult", {})
measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
unit = record.get("units", "")
expected_range = record.get("rangeString", "N/A")
out_of_range_type = record.get("outOfRangeType", "in_range")
# Get optimal range from biomarker.sexDetails for the specified gender
optimal_range = "N/A"
optimal_low = ""
optimal_high = ""
biomarker_info = record.get("biomarker", {})
sex_details = biomarker_info.get("sexDetails", [])
if sex_details:
optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)
# Format optimal range string
if optimal_low and optimal_high:
optimal_range = f"{optimal_low}-{optimal_high}"
elif optimal_high and not optimal_low:
optimal_range = f"<{optimal_high}"
elif optimal_low and not optimal_high:
optimal_range = f">{optimal_low}"
else:
optimal_range = "N/A"
# Escape HTML special characters to prevent parsing errors
table_data.append([
Paragraph(escape(str(biomarker_name)), normal_style),
Paragraph(escape(str(measurement)), normal_style),
Paragraph(escape(str(expected_range)), normal_style),
Paragraph(escape(str(optimal_range)), normal_style),
Paragraph(escape(str(unit)), normal_style)
])
# Get color using the active color scheme
color_type = ACTIVE_COLOR_SCHEME(record, optimal_low, optimal_high)
row_color_types.append(color_type)
if len(table_data) > 1:
col_widths = [2.5*inch, 1.2*inch, 1.3*inch, 1.0*inch, 1.0*inch]
table = Table(table_data, colWidths=col_widths)
style_commands = [
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('LEFTPADDING', (0,0), (-1,-1), 4),
('RIGHTPADDING', (0,0), (-1,-1), 4),
('TOPPADDING', (0,0), (-1,-1), 2),
('BOTTOMPADDING', (0,0), (-1,-1), 2),
]
# Add background colors for all non-header rows
for i in range(1, len(table_data)):
if i-1 < len(row_color_types):
color_type = row_color_types[i-1]
bg_color = get_color_for_value(color_type)
style_commands.append(('BACKGROUND', (0, i), (-1, i), bg_color))
style = TableStyle(style_commands)
table.setStyle(style)
story.append(table)
else:
story.append(Paragraph("No biomarker data found for PDF.", normal_style))
try:
doc.build(story)
print(f"PDF report generated: {output_pdf_path}")
except Exception as e:
print(f"Error building PDF: {e}")
def create_text_report(biomarker_records, output_text_path, gender="male"):
"""
Generates the plain text report part.
"""
report_lines = [f"Biomarker Report (Text Format for LLM Ingestion) - {gender.capitalize()}\n", "---"]
if not biomarker_records:
report_lines.append("No biomarker data found.")
else:
for record in biomarker_records:
biomarker_name = record.get("biomarker", {}).get("name", "N/A")
current_result = record.get("currentResult", {})
measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
unit = record.get("units", "")
expected_range = record.get("rangeString", "N/A")
# Get optimal range from biomarker.sexDetails for the specified gender
optimal_range = "N/A"
biomarker_info = record.get("biomarker", {})
sex_details = biomarker_info.get("sexDetails", [])
if sex_details:
optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)
# Format optimal range string
if optimal_low and optimal_high:
optimal_range = f"{optimal_low}-{optimal_high}"
elif optimal_high and not optimal_low:
optimal_range = f"<{optimal_high}"
elif optimal_low and not optimal_high:
optimal_range = f">{optimal_low}"
else:
optimal_range = "N/A"
# Format the line with all information
line = f"{biomarker_name}: {measurement} {unit}".strip()
line += f" | Expected: {expected_range}"
if optimal_range != "N/A":
line += f" | Optimal: {optimal_range}"
report_lines.append(line)
try:
with open(output_text_path, 'w') as f:
for line in report_lines:
f.write(line + "\n")
print(f"Text report generated: {output_text_path}")
except Exception as e:
print(f"Error writing text report: {e}")
def process_biomarker_data(json_file_path, output_pdf_path, output_text_path, gender="male"):
"""
Processes biomarker JSON data and generates PDF and Text reports.
"""
with open(json_file_path, 'r') as f:
data = json.load(f)
biomarker_records = data.get("data", {}).get("biomarkerResultsRecord", [])
if not biomarker_records:
print("No 'biomarkerResultsRecord' found in the JSON data.")
with open(output_pdf_path, 'wb') as f:
SimpleDocTemplate(f).build([Paragraph("No biomarker data found.")])
with open(output_text_path, 'w') as f:
f.write("No biomarker data found in the input JSON.")
print(f"Empty/Notice PDF created: {output_pdf_path}")
print(f"Empty/Notice Text file created: {output_text_path}")
return
create_pdf_report(biomarker_records, output_pdf_path, gender)
create_text_report(biomarker_records, output_text_path, gender)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate PDF and Text reports from biomarker JSON data.")
parser.add_argument("json_file", help="Path to the input JSON file.")
parser.add_argument("-op", "--output_pdf",
help="Path for the output PDF file (default: [json_filename]_dense_report.pdf).")
parser.add_argument("-ot", "--output_text",
help="Path for the output Text file (default: [json_filename]_llm_data.txt).")
parser.add_argument("-g", "--gender",
default="male",
choices=["male", "female"],
help="Gender for optimal range filtering (default: male).")
args = parser.parse_args()
json_input_path = args.json_file
base_name = os.path.splitext(os.path.basename(json_input_path))[0]
output_dir = os.path.dirname(json_input_path)
if not output_dir:
output_dir = "."
pdf_output_path = args.output_pdf if args.output_pdf else os.path.join(output_dir, f"{base_name}_dense_report.pdf")
text_output_path = args.output_text if args.output_text else os.path.join(output_dir, f"{base_name}_llm_data.txt")
process_biomarker_data(json_input_path, pdf_output_path, text_output_path, args.gender)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment