Skip to content

Instantly share code, notes, and snippets.

@pamelafox
Created April 24, 2025 20:36
Show Gist options
  • Save pamelafox/9d29449ef4138b1be5b843d69a205f82 to your computer and use it in GitHub Desktop.
Save pamelafox/9d29449ef4138b1be5b843d69a205f82 to your computer and use it in GitHub Desktop.
pymupdf experiments
import logging
import os
import azure.identity
import openai
import pymupdf4llm
from dotenv import load_dotenv
from pydantic import BaseModel
from rich import print
logging.basicConfig(level=logging.WARNING)
load_dotenv(override=True)
if os.getenv("OPENAI_HOST", "github") == "azure":
if not os.getenv("AZURE_OPENAI_SERVICE") or not os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"):
logging.warning("AZURE_OPENAI_SERVICE and AZURE_OPENAI_GPT_DEPLOYMENT env variables are empty. See README.")
exit(1)
credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))
token_provider = azure.identity.get_bearer_token_provider(
credential, "https://cognitiveservices.azure.com/.default"
)
client = openai.AzureOpenAI(
api_version="2024-08-01-preview",
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
azure_ad_token_provider=token_provider,
)
model_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")
else:
if not os.getenv("GITHUB_TOKEN"):
logging.warning("GITHUB_TOKEN env variable is empty. See README.")
exit(1)
client = openai.OpenAI(
base_url="https://models.inference.ai.azure.com",
api_key=os.environ["GITHUB_TOKEN"],
# Specify the API version to use the Structured Outputs feature
default_query={"api-version": "2024-08-01-preview"},
)
model_name = "gpt-4o"
# Define models for Structured Outputs
class Item(BaseModel):
product: str
price: float
quantity: int
class Receipt(BaseModel):
total: float
shipping: float
payment_method: str
items: list[Item]
order_number: int
# Prepare PDF as markdown text
md_text = pymupdf4llm.to_markdown("plants.pdf", pages=[0])
print(md_text)
response = client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": "You are a document reformatter."},
{"role": "user", "content": "Reformat the provided Markdown text as a Markdown table:" + md_text},
]
)
print(response.choices[0].message.content)
exit()
# Send request to GPT model to extract using Structured Outputs
completion = client.beta.chat.completions.parse(
model=model_name,
messages=[
{"role": "system", "content": "Extract the information the receipt"},
{"role": "user", "content": md_text},
],
response_format=Receipt,
)
message = completion.choices[0].message
if message.refusal:
print(message.refusal)
else:
print(message.parsed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment