vedantroy · May 30, 2025 07:34
diff --git a/gistfile1.txt b/gistfile1.txt
 # /// script
 # requires-python = ">=3.8"
 # dependencies = [
 #   "reportlab>=3.6.0",
 # ]
 # ///

 import json
 from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.lib.units import inch
 from reportlab.lib import colors
 import argparse
 import os
 from html import escape


 # Coloring scheme functions
 def color_scheme_none(record, optimal_low, optimal_high):
    """Always returns grey - no coloring based on values"""
    return "grey"


 def color_scheme_optimal_aware(record, optimal_low, optimal_high):
    """
    Optimal-aware coloring scheme:
    - Green: in optimal range OR (no optimal range exists but in normal range)
    - Yellow: in normal range but outside optimal range
    - Red: out of normal range
    - Grey: unknown status
    """
    out_of_range_type = record.get("outOfRangeType", "unknown")
    
    # First check: out of range -> Red
    if out_of_range_type in ["above", "below"]:
        return "red"
    
    # Second check: in range
    elif out_of_range_type == "in_range":
        # Check if optimal range exists
        has_optimal = (optimal_low and optimal_low != "") or (optimal_high and optimal_high != "")
        
        if not has_optimal:
            # No optimal range but in range -> Green
            return "green"
        
        # Optimal range exists, check if value is within it
        try:
            current_result = record.get("currentResult", {})
            calculated_result = current_result.get("calculatedResult")
            
            if calculated_result and calculated_result != "N/A":
                value = float(calculated_result)
                
                within_low = True
                within_high = True
                
                if optimal_low and optimal_low != "":
                    within_low = value >= float(optimal_low)
                if optimal_high and optimal_high != "":
                    within_high = value <= float(optimal_high)
                
                if within_low and within_high:
                    return "green"  # In optimal range
                else:
                    return "yellow"  # In range but not optimal
        except (ValueError, TypeError):
            # Can't parse values, but we know it's in range
            return "green"
    
    # Unknown status -> Grey
    return "grey"


 def color_scheme_basic(record, optimal_low, optimal_high):
    """
    Basic coloring scheme:
    - Grey: in normal range or unknown
    - Red: out of normal range
    """
    out_of_range_type = record.get("outOfRangeType", "unknown")
    
    if out_of_range_type in ["above", "below"]:
        return "red"
    else:
        return "grey"


 # Select which coloring scheme to use by commenting/uncommenting
 # ACTIVE_COLOR_SCHEME = color_scheme_none
 ACTIVE_COLOR_SCHEME = color_scheme_optimal_aware
 # ACTIVE_COLOR_SCHEME = color_scheme_basic


 def get_color_for_value(color_type):
    """Maps color type to actual reportlab color"""
    color_map = {
        "red": colors.lightpink,
        "yellow": colors.lightyellow,
        "green": colors.lightgreen,
        "grey": colors.lightgrey
    }
    return color_map.get(color_type, colors.lightgrey)


 def get_optimal_range_for_gender(sex_details, gender):
    """
    Gets the optimal range for the specified gender.
    Falls back to 'All' if gender-specific range not found.
    """
    if not sex_details:
        return "", ""
    
    # Try to find gender-specific range first
    for detail in sex_details:
        if detail.get("sex", "").lower() == gender.lower():
            return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")
    
    # Fall back to "All" if gender-specific not found
    for detail in sex_details:
        if detail.get("sex", "").lower() == "all":
            return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")
    
    # If neither found, return first entry or empty
    if sex_details:
        return sex_details[0].get("optimalRangeLow", ""), sex_details[0].get("optimalRangeHigh", "")
    
    return "", ""


 def create_pdf_report(biomarker_records, output_pdf_path, gender="male"):
    """
    Generates the PDF report part.
    """
    doc = SimpleDocTemplate(output_pdf_path, pagesize=letter,
                            rightMargin=inch/2, leftMargin=inch/2,
                            topMargin=inch/2, bottomMargin=inch/2)
    story = []
    styles = getSampleStyleSheet()
    title_style = styles['h1']
    normal_style = styles['Normal']
    normal_style.fontSize = 8 # Smaller font for density

    title = Paragraph(f"Biomarker Report (Dense Format) - {gender.capitalize()}", title_style)
    story.append(title)
    story.append(Spacer(1, 0.2*inch))

    table_data = [["Biomarker Name", "Measurement", "Expected Range", "Optimal Range", "Unit"]]
    row_colors = []  # Track which rows should be highlighted
    row_color_types = []  # Track the type of highlighting

    for i, record in enumerate(biomarker_records):
        biomarker_name = record.get("biomarker", {}).get("name", "N/A")
        current_result = record.get("currentResult", {})
        measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
        unit = record.get("units", "")
        expected_range = record.get("rangeString", "N/A")
        out_of_range_type = record.get("outOfRangeType", "in_range")
        
        # Get optimal range from biomarker.sexDetails for the specified gender
        optimal_range = "N/A"
        optimal_low = ""
        optimal_high = ""
        biomarker_info = record.get("biomarker", {})
        sex_details = biomarker_info.get("sexDetails", [])
        
        if sex_details:
            optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)

        # Format optimal range string
        if optimal_low and optimal_high:
            optimal_range = f"{optimal_low}-{optimal_high}"
        elif optimal_high and not optimal_low:
            optimal_range = f"<{optimal_high}"
        elif optimal_low and not optimal_high:
            optimal_range = f">{optimal_low}"
        else:
            optimal_range = "N/A"
        
        # Escape HTML special characters to prevent parsing errors
        table_data.append([
            Paragraph(escape(str(biomarker_name)), normal_style),
            Paragraph(escape(str(measurement)), normal_style),
            Paragraph(escape(str(expected_range)), normal_style),
            Paragraph(escape(str(optimal_range)), normal_style),
            Paragraph(escape(str(unit)), normal_style)
        ])
        
        # Get color using the active color scheme
        color_type = ACTIVE_COLOR_SCHEME(record, optimal_low, optimal_high)
        row_color_types.append(color_type)

    if len(table_data) > 1:
        col_widths = [2.5*inch, 1.2*inch, 1.3*inch, 1.0*inch, 1.0*inch]
        table = Table(table_data, colWidths=col_widths)
        
        style_commands = [
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('FONTSIZE', (0, 0), (-1, 0), 10),
            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
            ('GRID', (0, 0), (-1, -1), 1, colors.black),
            ('LEFTPADDING', (0,0), (-1,-1), 4),
            ('RIGHTPADDING', (0,0), (-1,-1), 4),
            ('TOPPADDING', (0,0), (-1,-1), 2),
            ('BOTTOMPADDING', (0,0), (-1,-1), 2),
        ]
        
        # Add background colors for all non-header rows
        for i in range(1, len(table_data)):
            if i-1 < len(row_color_types):
                color_type = row_color_types[i-1]
                bg_color = get_color_for_value(color_type)
                style_commands.append(('BACKGROUND', (0, i), (-1, i), bg_color))

        style = TableStyle(style_commands)
        table.setStyle(style)
        story.append(table)
    else:
        story.append(Paragraph("No biomarker data found for PDF.", normal_style))

    try:
        doc.build(story)
        print(f"PDF report generated: {output_pdf_path}")
    except Exception as e:
        print(f"Error building PDF: {e}")

 def create_text_report(biomarker_records, output_text_path, gender="male"):
    """
    Generates the plain text report part.
    """
    report_lines = [f"Biomarker Report (Text Format for LLM Ingestion) - {gender.capitalize()}\n", "---"]

    if not biomarker_records:
        report_lines.append("No biomarker data found.")
    else:
        for record in biomarker_records:
            biomarker_name = record.get("biomarker", {}).get("name", "N/A")
            current_result = record.get("currentResult", {})
            measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
            unit = record.get("units", "")
            expected_range = record.get("rangeString", "N/A")
            
            # Get optimal range from biomarker.sexDetails for the specified gender
            optimal_range = "N/A"
            biomarker_info = record.get("biomarker", {})
            sex_details = biomarker_info.get("sexDetails", [])
            
            if sex_details:
                optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)

            # Format optimal range string
            if optimal_low and optimal_high:
                optimal_range = f"{optimal_low}-{optimal_high}"
            elif optimal_high and not optimal_low:
                optimal_range = f"<{optimal_high}"
            elif optimal_low and not optimal_high:
                optimal_range = f">{optimal_low}"
            else:
                optimal_range = "N/A"
            
            # Format the line with all information
            line = f"{biomarker_name}: {measurement} {unit}".strip()
            line += f" | Expected: {expected_range}"
            if optimal_range != "N/A":
                line += f" | Optimal: {optimal_range}"
            
            report_lines.append(line)

    try:
        with open(output_text_path, 'w') as f:
            for line in report_lines:
                f.write(line + "\n")
        print(f"Text report generated: {output_text_path}")
    except Exception as e:
        print(f"Error writing text report: {e}")


 def process_biomarker_data(json_file_path, output_pdf_path, output_text_path, gender="male"):
    """
    Processes biomarker JSON data and generates PDF and Text reports.
    """
    with open(json_file_path, 'r') as f:
        data = json.load(f)

    biomarker_records = data.get("data", {}).get("biomarkerResultsRecord", [])

    if not biomarker_records:
        print("No 'biomarkerResultsRecord' found in the JSON data.")
        with open(output_pdf_path, 'wb') as f:
             SimpleDocTemplate(f).build([Paragraph("No biomarker data found.")])
        with open(output_text_path, 'w') as f:
             f.write("No biomarker data found in the input JSON.")
        print(f"Empty/Notice PDF created: {output_pdf_path}")
        print(f"Empty/Notice Text file created: {output_text_path}")
        return

    create_pdf_report(biomarker_records, output_pdf_path, gender)
    create_text_report(biomarker_records, output_text_path, gender)


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate PDF and Text reports from biomarker JSON data.")
    parser.add_argument("json_file", help="Path to the input JSON file.")
    parser.add_argument("-op", "--output_pdf",
                        help="Path for the output PDF file (default: [json_filename]_dense_report.pdf).")
    parser.add_argument("-ot", "--output_text",
                        help="Path for the output Text file (default: [json_filename]_llm_data.txt).")
    parser.add_argument("-g", "--gender", 
                        default="male",
                        choices=["male", "female"],
                        help="Gender for optimal range filtering (default: male).")

    args = parser.parse_args()

    json_input_path = args.json_file
    base_name = os.path.splitext(os.path.basename(json_input_path))[0]
    output_dir = os.path.dirname(json_input_path)
    if not output_dir:
        output_dir = "."

    pdf_output_path = args.output_pdf if args.output_pdf else os.path.join(output_dir, f"{base_name}_dense_report.pdf")
    text_output_path = args.output_text if args.output_text else os.path.join(output_dir, f"{base_name}_llm_data.txt")

    process_biomarker_data(json_input_path, pdf_output_path, text_output_path, args.gender)
	# /// script
	# requires-python = ">=3.8"
	# dependencies = [
	# "reportlab>=3.6.0",
	# ]
	# ///

	import json
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
	from reportlab.lib.styles import getSampleStyleSheet
	from reportlab.lib.units import inch
	from reportlab.lib import colors
	import argparse
	import os
	from html import escape


	# Coloring scheme functions
	def color_scheme_none(record, optimal_low, optimal_high):
	"""Always returns grey - no coloring based on values"""
	return "grey"


	def color_scheme_optimal_aware(record, optimal_low, optimal_high):
	"""
	Optimal-aware coloring scheme:
	- Green: in optimal range OR (no optimal range exists but in normal range)
	- Yellow: in normal range but outside optimal range
	- Red: out of normal range
	- Grey: unknown status
	"""
	out_of_range_type = record.get("outOfRangeType", "unknown")

	# First check: out of range -> Red
	if out_of_range_type in ["above", "below"]:
	return "red"

	# Second check: in range
	elif out_of_range_type == "in_range":
	# Check if optimal range exists
	has_optimal = (optimal_low and optimal_low != "") or (optimal_high and optimal_high != "")

	if not has_optimal:
	# No optimal range but in range -> Green
	return "green"

	# Optimal range exists, check if value is within it
	try:
	current_result = record.get("currentResult", {})
	calculated_result = current_result.get("calculatedResult")

	if calculated_result and calculated_result != "N/A":
	value = float(calculated_result)

	within_low = True
	within_high = True

	if optimal_low and optimal_low != "":
	within_low = value >= float(optimal_low)
	if optimal_high and optimal_high != "":
	within_high = value <= float(optimal_high)

	if within_low and within_high:
	return "green" # In optimal range
	else:
	return "yellow" # In range but not optimal
	except (ValueError, TypeError):
	# Can't parse values, but we know it's in range
	return "green"

	# Unknown status -> Grey
	return "grey"


	def color_scheme_basic(record, optimal_low, optimal_high):
	"""
	Basic coloring scheme:
	- Grey: in normal range or unknown
	- Red: out of normal range
	"""
	out_of_range_type = record.get("outOfRangeType", "unknown")

	if out_of_range_type in ["above", "below"]:
	return "red"
	else:
	return "grey"


	# Select which coloring scheme to use by commenting/uncommenting
	# ACTIVE_COLOR_SCHEME = color_scheme_none
	ACTIVE_COLOR_SCHEME = color_scheme_optimal_aware
	# ACTIVE_COLOR_SCHEME = color_scheme_basic


	def get_color_for_value(color_type):
	"""Maps color type to actual reportlab color"""
	color_map = {
	"red": colors.lightpink,
	"yellow": colors.lightyellow,
	"green": colors.lightgreen,
	"grey": colors.lightgrey
	}
	return color_map.get(color_type, colors.lightgrey)


	def get_optimal_range_for_gender(sex_details, gender):
	"""
	Gets the optimal range for the specified gender.
	Falls back to 'All' if gender-specific range not found.
	"""
	if not sex_details:
	return "", ""

	# Try to find gender-specific range first
	for detail in sex_details:
	if detail.get("sex", "").lower() == gender.lower():
	return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")

	# Fall back to "All" if gender-specific not found
	for detail in sex_details:
	if detail.get("sex", "").lower() == "all":
	return detail.get("optimalRangeLow", ""), detail.get("optimalRangeHigh", "")

	# If neither found, return first entry or empty
	if sex_details:
	return sex_details[0].get("optimalRangeLow", ""), sex_details[0].get("optimalRangeHigh", "")

	return "", ""


	def create_pdf_report(biomarker_records, output_pdf_path, gender="male"):
	"""
	Generates the PDF report part.
	"""
	doc = SimpleDocTemplate(output_pdf_path, pagesize=letter,
	rightMargin=inch/2, leftMargin=inch/2,
	topMargin=inch/2, bottomMargin=inch/2)
	story = []
	styles = getSampleStyleSheet()
	title_style = styles['h1']
	normal_style = styles['Normal']
	normal_style.fontSize = 8 # Smaller font for density

	title = Paragraph(f"Biomarker Report (Dense Format) - {gender.capitalize()}", title_style)
	story.append(title)
	story.append(Spacer(1, 0.2*inch))

	table_data = [["Biomarker Name", "Measurement", "Expected Range", "Optimal Range", "Unit"]]
	row_colors = [] # Track which rows should be highlighted
	row_color_types = [] # Track the type of highlighting

	for i, record in enumerate(biomarker_records):
	biomarker_name = record.get("biomarker", {}).get("name", "N/A")
	current_result = record.get("currentResult", {})
	measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
	unit = record.get("units", "")
	expected_range = record.get("rangeString", "N/A")
	out_of_range_type = record.get("outOfRangeType", "in_range")

	# Get optimal range from biomarker.sexDetails for the specified gender
	optimal_range = "N/A"
	optimal_low = ""
	optimal_high = ""
	biomarker_info = record.get("biomarker", {})
	sex_details = biomarker_info.get("sexDetails", [])

	if sex_details:
	optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)

	# Format optimal range string
	if optimal_low and optimal_high:
	optimal_range = f"{optimal_low}-{optimal_high}"
	elif optimal_high and not optimal_low:
	optimal_range = f"<{optimal_high}"
	elif optimal_low and not optimal_high:
	optimal_range = f">{optimal_low}"
	else:
	optimal_range = "N/A"

	# Escape HTML special characters to prevent parsing errors
	table_data.append([
	Paragraph(escape(str(biomarker_name)), normal_style),
	Paragraph(escape(str(measurement)), normal_style),
	Paragraph(escape(str(expected_range)), normal_style),
	Paragraph(escape(str(optimal_range)), normal_style),
	Paragraph(escape(str(unit)), normal_style)
	])

	# Get color using the active color scheme
	color_type = ACTIVE_COLOR_SCHEME(record, optimal_low, optimal_high)
	row_color_types.append(color_type)

	if len(table_data) > 1:
	col_widths = [2.5inch, 1.2inch, 1.3inch, 1.0inch, 1.0*inch]
	table = Table(table_data, colWidths=col_widths)

	style_commands = [
	('BACKGROUND', (0, 0), (-1, 0), colors.grey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (-1, -1), 'LEFT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, 0), 10),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('GRID', (0, 0), (-1, -1), 1, colors.black),
	('LEFTPADDING', (0,0), (-1,-1), 4),
	('RIGHTPADDING', (0,0), (-1,-1), 4),
	('TOPPADDING', (0,0), (-1,-1), 2),
	('BOTTOMPADDING', (0,0), (-1,-1), 2),
	]

	# Add background colors for all non-header rows
	for i in range(1, len(table_data)):
	if i-1 < len(row_color_types):
	color_type = row_color_types[i-1]
	bg_color = get_color_for_value(color_type)
	style_commands.append(('BACKGROUND', (0, i), (-1, i), bg_color))

	style = TableStyle(style_commands)
	table.setStyle(style)
	story.append(table)
	else:
	story.append(Paragraph("No biomarker data found for PDF.", normal_style))

	try:
	doc.build(story)
	print(f"PDF report generated: {output_pdf_path}")
	except Exception as e:
	print(f"Error building PDF: {e}")

	def create_text_report(biomarker_records, output_text_path, gender="male"):
	"""
	Generates the plain text report part.
	"""
	report_lines = [f"Biomarker Report (Text Format for LLM Ingestion) - {gender.capitalize()}\n", "---"]

	if not biomarker_records:
	report_lines.append("No biomarker data found.")
	else:
	for record in biomarker_records:
	biomarker_name = record.get("biomarker", {}).get("name", "N/A")
	current_result = record.get("currentResult", {})
	measurement = current_result.get("displayResult") if current_result.get("displayResult") is not None else current_result.get("calculatedResult", "N/A")
	unit = record.get("units", "")
	expected_range = record.get("rangeString", "N/A")

	# Get optimal range from biomarker.sexDetails for the specified gender
	optimal_range = "N/A"
	biomarker_info = record.get("biomarker", {})
	sex_details = biomarker_info.get("sexDetails", [])

	if sex_details:
	optimal_low, optimal_high = get_optimal_range_for_gender(sex_details, gender)

	# Format optimal range string
	if optimal_low and optimal_high:
	optimal_range = f"{optimal_low}-{optimal_high}"
	elif optimal_high and not optimal_low:
	optimal_range = f"<{optimal_high}"
	elif optimal_low and not optimal_high:
	optimal_range = f">{optimal_low}"
	else:
	optimal_range = "N/A"

	# Format the line with all information
	line = f"{biomarker_name}: {measurement} {unit}".strip()
	line += f" \| Expected: {expected_range}"
	if optimal_range != "N/A":
	line += f" \| Optimal: {optimal_range}"

	report_lines.append(line)

	try:
	with open(output_text_path, 'w') as f:
	for line in report_lines:
	f.write(line + "\n")
	print(f"Text report generated: {output_text_path}")
	except Exception as e:
	print(f"Error writing text report: {e}")


	def process_biomarker_data(json_file_path, output_pdf_path, output_text_path, gender="male"):
	"""
	Processes biomarker JSON data and generates PDF and Text reports.
	"""
	with open(json_file_path, 'r') as f:
	data = json.load(f)

	biomarker_records = data.get("data", {}).get("biomarkerResultsRecord", [])

	if not biomarker_records:
	print("No 'biomarkerResultsRecord' found in the JSON data.")
	with open(output_pdf_path, 'wb') as f:
	SimpleDocTemplate(f).build([Paragraph("No biomarker data found.")])
	with open(output_text_path, 'w') as f:
	f.write("No biomarker data found in the input JSON.")
	print(f"Empty/Notice PDF created: {output_pdf_path}")
	print(f"Empty/Notice Text file created: {output_text_path}")
	return

	create_pdf_report(biomarker_records, output_pdf_path, gender)
	create_text_report(biomarker_records, output_text_path, gender)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Generate PDF and Text reports from biomarker JSON data.")
	parser.add_argument("json_file", help="Path to the input JSON file.")
	parser.add_argument("-op", "--output_pdf",
	help="Path for the output PDF file (default: [json_filename]_dense_report.pdf).")
	parser.add_argument("-ot", "--output_text",
	help="Path for the output Text file (default: [json_filename]_llm_data.txt).")
	parser.add_argument("-g", "--gender",
	default="male",
	choices=["male", "female"],
	help="Gender for optimal range filtering (default: male).")

	args = parser.parse_args()

	json_input_path = args.json_file
	base_name = os.path.splitext(os.path.basename(json_input_path))[0]
	output_dir = os.path.dirname(json_input_path)
	if not output_dir:
	output_dir = "."

	pdf_output_path = args.output_pdf if args.output_pdf else os.path.join(output_dir, f"{base_name}_dense_report.pdf")
	text_output_path = args.output_text if args.output_text else os.path.join(output_dir, f"{base_name}_llm_data.txt")

	process_biomarker_data(json_input_path, pdf_output_path, text_output_path, args.gender)