Created
November 4, 2023 16:17
-
-
Save neelabalan/e06c953dde36b74cc2f926ea24995ac4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional | |
from pydantic import Field, BaseModel | |
import openai | |
import instructor | |
instructor.patch() | |
class ContentSummary(BaseModel): | |
title: str | |
summary: str | |
tags: str | |
class MaybeContentSummary(BaseModel): | |
"""MaybeContentSummary is a model that represents the result of a content extraction. | |
It can either have a result or an error and message. | |
If theres no title, summary, tags, then the error and message will be populated.""" | |
result: Optional[ContentSummary] = Field( | |
default=None, description="The result of extraction if it exists." | |
) | |
error: Optional[bool] = Field(default=False) | |
message: str = Field(default=None) | |
def extract_content(text: str): | |
content: MaybeContentSummary = openai.ChatCompletion.create( | |
response_model=MaybeContentSummary, | |
model="gpt-3.5-turbo", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "You are a world class crawler that can extract content from any website.", | |
}, | |
{ | |
"role": "user", | |
"content": "Extract the title, summary and tags from this website:", | |
}, | |
{"role": "user", "content": text}, | |
] | |
) | |
print(content) | |
match content: | |
case MaybeContentSummary(error=True, message=message): | |
print(f"Error: {message}") | |
case MaybeContentSummary(result=content): | |
print(content) | |
return content | |
if __name__ == "__main__": | |
extract_content("Page Not Found") | |
# Error: Page Not Found - Unable to extract content from the website. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://github.com/jxnl/instructor/pull/134/files