Skip to content

Instantly share code, notes, and snippets.

@pamelafox
Created December 2, 2024 21:39
Show Gist options
  • Save pamelafox/f8fc153dd6106f5a2d041fd773680690 to your computer and use it in GitHub Desktop.
Save pamelafox/f8fc153dd6106f5a2d041fd773680690 to your computer and use it in GitHub Desktop.
Extract entities from NPR article with gpt-4o structured outputs
import os
import bs4
import requests
import rich
from openai import OpenAI
from pydantic import BaseModel
client = OpenAI(
base_url="https://models.inference.ai.azure.com",
api_key=os.environ["GITHUB_TOKEN"],
# Specify the API version to use the Structured Outputs feature
default_query={"api-version": "2024-08-01-preview"})
model_name = "gpt-4o"
# Fetch the article
url = "https://www.npr.org/2024/12/01/nx-s1-5211874/lake-effect-snow-northeast-and-midwest"
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, "html.parser")
# Extract the text from the article
article = soup.find("article")
paragraphs = article.find_all("p")
text = "\n".join([p.text for p in paragraphs])
class Place(BaseModel):
name: str
type: str
class ArticleEntities(BaseModel):
persons: list[str]
places: list[Place]
organizations: list[str]
topics: list[str]
completion = client.beta.chat.completions.parse(
model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"),
messages=[
{"role": "system", "content": "Extract the entities from the article."},
{"role": "user", "content": text},
],
response_format=ArticleEntities,
)
output = completion.choices[0].message.parsed
entities = ArticleEntities.model_validate(output)
rich.print(entities)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment