from openai import OpenAI
import base64
from pydantic import BaseModel
client = OpenAI(api_key="sk-proj-fj0o--eGHLNfe")
file_name = "doc2.pdf"
with open(file_name, "rb") as file:
file_data = file.read()
base64_data = base64.b64encode(file_data).decode('utf-8')
class DataExtractor(BaseModel):
email: str
response = client.beta.chat.completions.parse(
model="gpt-4.1-mini",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are a expert to extract data from a PDF files."
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "PDF file:\n"
},
{
"type": "file",
"file": {
"file_data": "data:application/pdf;base64," + base64_data,
"filename": file_name,
}
}
]
}
],
response_format=DataExtractor
)
data : DataExtractor = response.choices[0].message.parsed
print(data)
Created
June 7, 2025 16:01
-
-
Save johnidm/73ff17ad74aa699a9b66d9da4e48063f to your computer and use it in GitHub Desktop.
Using LLM to processo PDF file
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment