decagondev · May 24, 2024 16:18
diff --git a/raggy.py b/raggy.py
 import os
 import fitz  # PyMuPDF
 from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, Document
 from llama_index.prompts import PromptTemplate
 from ollama_llama import Llama

 class PDFDirectoryReader(SimpleDirectoryReader):
    def __init__(self, directory_path):
        self.directory_path = directory_path

    def load_data(self):
        documents = []
        for filename in os.listdir(self.directory_path):
            if filename.endswith('.pdf'):
                file_path = os.path.join(self.directory_path, filename)
                text = self._extract_text_from_pdf(file_path)
                documents.append(Document(page_content=text, metadata={'source': filename}))
        return documents

    def _extract_text_from_pdf(self, file_path):
        text = ""
        with fitz.open(file_path) as pdf:
            for page in pdf:
                text += page.get_text()
        return text

 # Define your prompt
 prompt = "How has Berkshire Hathaway's investment in Coca-cola grown?"

 # Set up the Llama3 LLM
 llm = Llama(api_key="your-ollama-api-key", model_name="llama3")
 llm_predictor = LLMPredictor(llm=llm)

 # Load your documents into the index from a directory containing PDFs
 documents = PDFDirectoryReader('path/to/your/documents').load_data()

 # Create the GPT Simple Vector Index
 index = GPTSimpleVectorIndex.from_documents(documents, llm_predictor=llm_predictor)

 # Create the service context
 service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

 # Querying the vector database for "relevant" docs
 relevant_docs = index.query(prompt, service_context=service_context)
 context = "\n".join([doc.page_content for doc in relevant_docs])

 for doc in relevant_docs:
    print(f"Source: {doc.metadata.get('source', 'unknown')}\nContent: {doc.page_content}\n\n")
 print("__________________________")

 # Adding context to our prompt
 template = PromptTemplate(template="{query} Context: {context}", input_variables=["query", "context"])
 prompt_with_context = template.invoke({"query": prompt, "context": context})

 # Asking the LLM for a response from our prompt with the provided context
 results = llm.invoke(prompt_with_context)

 print(results.content)
	import os
	import fitz # PyMuPDF
	from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, Document
	from llama_index.prompts import PromptTemplate
	from ollama_llama import Llama

	class PDFDirectoryReader(SimpleDirectoryReader):
	def __init__(self, directory_path):
	self.directory_path = directory_path

	def load_data(self):
	documents = []
	for filename in os.listdir(self.directory_path):
	if filename.endswith('.pdf'):
	file_path = os.path.join(self.directory_path, filename)
	text = self._extract_text_from_pdf(file_path)
	documents.append(Document(page_content=text, metadata={'source': filename}))
	return documents

	def _extract_text_from_pdf(self, file_path):
	text = ""
	with fitz.open(file_path) as pdf:
	for page in pdf:
	text += page.get_text()
	return text

	# Define your prompt
	prompt = "How has Berkshire Hathaway's investment in Coca-cola grown?"

	# Set up the Llama3 LLM
	llm = Llama(api_key="your-ollama-api-key", model_name="llama3")
	llm_predictor = LLMPredictor(llm=llm)

	# Load your documents into the index from a directory containing PDFs
	documents = PDFDirectoryReader('path/to/your/documents').load_data()

	# Create the GPT Simple Vector Index
	index = GPTSimpleVectorIndex.from_documents(documents, llm_predictor=llm_predictor)

	# Create the service context
	service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

	# Querying the vector database for "relevant" docs
	relevant_docs = index.query(prompt, service_context=service_context)
	context = "\n".join([doc.page_content for doc in relevant_docs])

	for doc in relevant_docs:
	print(f"Source: {doc.metadata.get('source', 'unknown')}\nContent: {doc.page_content}\n\n")
	print("__________________________")

	# Adding context to our prompt
	template = PromptTemplate(template="{query} Context: {context}", input_variables=["query", "context"])
	prompt_with_context = template.invoke({"query": prompt, "context": context})

	# Asking the LLM for a response from our prompt with the provided context
	results = llm.invoke(prompt_with_context)

	print(results.content)