philschmid · May 12, 2025 09:16
diff --git a/g.py b/g.py
 import os
 from google import genai
 from pydantic import BaseModel, Field

 # create client
 client = genai.Client(api_key=os.getenv("GEMINI_API_KEY","xxx"))

 class PageText(BaseModel):
    """Represents the content of a page in the PDF document in markdown format."""

    text: str = Field(description="Extracted text of the page in markdown format.")

 EXTRACT_MARKDOWN = """
 You are given an image of a page from a PDF document.
 Your task is to extract the text accurately and convert it into Markdown format, ensuring proper structure and readability.
 Requirements:
 - Preserve Formatting: Maintain bold, italics, headings, lists, and other structural elements as they appear in the original text.
 - Extract Tables: If a table is present and can be accurately extracted, convert it into Markdown table format.
 - Charts & Graphics:
    - If the page contains a chart, table, or graphic that cannot be extracted as text, insert a contextually recognizable placeholder where it appears (e.g., [CHART: Sales Performance Q1], [GRAPHIC: Workflow Diagram], [TABLE: Financial Summary]).
    - The placeholder should be descriptive based on the content it represents to ensure easy identification later.
    - Use this format for each placeholder: "[TYPE: Description]" (e.g., [CHART: Sales Performance Q1 - Bar Chart]).
 - Accuracy: Ensure correct text extraction without modifying the content.
 - No Extra Output: Return only the formatted Markdown text—no explanations, comments, or additional information.
 - If the image is empty or contains no text, return an empty string.
 """  # noqa: E501

 for img_path in ["../assets/2.png", "../assets/7.png", "../assets/9.png"]:
    # Upload the file to the File API
    image = client.files.upload(file=img_path)
    for i in range(10):
        print(f"Attempt {i+1} of 10 for {img_path}")
        try:
            response = client.models.generate_content(
                model="gemini-2.5-flash-preview-04-17",
                contents=[EXTRACT_MARKDOWN, image],
            config={
                'response_mime_type': 'application/json',
                'response_schema': PageText,
            })
            print(response.parsed.text)
        except Exception as e:
            print(e)
	import os
	from google import genai
	from pydantic import BaseModel, Field

	# create client
	client = genai.Client(api_key=os.getenv("GEMINI_API_KEY","xxx"))

	class PageText(BaseModel):
	"""Represents the content of a page in the PDF document in markdown format."""

	text: str = Field(description="Extracted text of the page in markdown format.")

	EXTRACT_MARKDOWN = """
	You are given an image of a page from a PDF document.
	Your task is to extract the text accurately and convert it into Markdown format, ensuring proper structure and readability.
	Requirements:
	- Preserve Formatting: Maintain bold, italics, headings, lists, and other structural elements as they appear in the original text.
	- Extract Tables: If a table is present and can be accurately extracted, convert it into Markdown table format.
	- Charts & Graphics:
	- If the page contains a chart, table, or graphic that cannot be extracted as text, insert a contextually recognizable placeholder where it appears (e.g., [CHART: Sales Performance Q1], [GRAPHIC: Workflow Diagram], [TABLE: Financial Summary]).
	- The placeholder should be descriptive based on the content it represents to ensure easy identification later.
	- Use this format for each placeholder: "[TYPE: Description]" (e.g., [CHART: Sales Performance Q1 - Bar Chart]).
	- Accuracy: Ensure correct text extraction without modifying the content.
	- No Extra Output: Return only the formatted Markdown text—no explanations, comments, or additional information.
	- If the image is empty or contains no text, return an empty string.
	""" # noqa: E501

	for img_path in ["../assets/2.png", "../assets/7.png", "../assets/9.png"]:
	# Upload the file to the File API
	image = client.files.upload(file=img_path)
	for i in range(10):
	print(f"Attempt {i+1} of 10 for {img_path}")
	try:
	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-04-17",
	contents=[EXTRACT_MARKDOWN, image],
	config={
	'response_mime_type': 'application/json',
	'response_schema': PageText,
	})
	print(response.parsed.text)
	except Exception as e:
	print(e)