Created
December 2, 2024 21:39
-
-
Save pamelafox/f8fc153dd6106f5a2d041fd773680690 to your computer and use it in GitHub Desktop.
Extract entities from NPR article with gpt-4o structured outputs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import bs4 | |
import requests | |
import rich | |
from openai import OpenAI | |
from pydantic import BaseModel | |
client = OpenAI( | |
base_url="https://models.inference.ai.azure.com", | |
api_key=os.environ["GITHUB_TOKEN"], | |
# Specify the API version to use the Structured Outputs feature | |
default_query={"api-version": "2024-08-01-preview"}) | |
model_name = "gpt-4o" | |
# Fetch the article | |
url = "https://www.npr.org/2024/12/01/nx-s1-5211874/lake-effect-snow-northeast-and-midwest" | |
response = requests.get(url) | |
soup = bs4.BeautifulSoup(response.text, "html.parser") | |
# Extract the text from the article | |
article = soup.find("article") | |
paragraphs = article.find_all("p") | |
text = "\n".join([p.text for p in paragraphs]) | |
class Place(BaseModel): | |
name: str | |
type: str | |
class ArticleEntities(BaseModel): | |
persons: list[str] | |
places: list[Place] | |
organizations: list[str] | |
topics: list[str] | |
completion = client.beta.chat.completions.parse( | |
model=os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT"), | |
messages=[ | |
{"role": "system", "content": "Extract the entities from the article."}, | |
{"role": "user", "content": text}, | |
], | |
response_format=ArticleEntities, | |
) | |
output = completion.choices[0].message.parsed | |
entities = ArticleEntities.model_validate(output) | |
rich.print(entities) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment