Skip to content

Instantly share code, notes, and snippets.

@cast42
Created November 28, 2024 13:09
Show Gist options
  • Save cast42/78f12854ba83c273c9f41e7668630d79 to your computer and use it in GitHub Desktop.
Save cast42/78f12854ba83c273c9f41e7668630d79 to your computer and use it in GitHub Desktop.
# https://github.com/andrewyng/aisuite/blob/main/examples/QnA_with_pdf.ipynb
import requests
import fitz
from io import BytesIO
# Link to paper in pdf format on the cost of avocados.
pdf_path = "https://arxiv.org/pdf/2104.04649"
pdf_text = ""
# Download PDF and load it into memory
response = requests.get(pdf_path)
if response.status_code == 200:
pdf_data = BytesIO(response.content) # Load PDF data into BytesIO
# Open PDF from memory using fitz
with fitz.open(stream=pdf_data, filetype="pdf") as pdf:
text = ""
for page_num in range(pdf.page_count):
page = pdf[page_num]
pdf_text += page.get_text("text") # Extract text
pdf_text += "\n" + "="*50 + "\n" # Separator for each page
print("Downloaded and extracted text from pdf.")
else:
print(f"Failed to download PDF: {response.status_code}")
question = "Is the price of organic avocados higher than non-organic avocados? What has been the trend?"
import aisuite as ai
client = ai.Client()
messages = [
{"role": "system", "content": "You are a helpful assistant. Answer the question only based on the below text."},
{"role": "user", "content": f"Answer the question based on the following text:\n\n{pdf_text}\n\nQuestion: {question}\n"},
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment