This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import spacy | |
from spacy.training.example import Example | |
from spacy.util import minibatch | |
# Initialize or load an NLP object and get the NER pipeline | |
nlp = spacy.blank("en") | |
nlp.add_pipe("ner") | |
# Initialize the optimizer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spacy.scorer import Scorer | |
scorer = Scorer() | |
# Example evaluation data in the same format as training data | |
EVAL_DATA = TRAIN_DATA = [ | |
# GPS_COORDINATES | |
("Coordinates: 124.50 N, 68.95 W", {"entities": [(13, 29, "GPS_COORDINATES")]}), | |
("Location: 13.35 S, 57.80 E", {"entities": [(11, 24, "GPS_COORDINATES")]}), | |
("GPS: 24.50 N, 57.80 W", {"entities": [(5, 19, "GPS_COORDINATES")]}), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__ == "__main__": | |
pdf_name = input("What is the name of the PDF file you want to chat with ?") | |
pdf_folder = 'pdfs' | |
pdf_path = os.path.join(pdf_folder,pdf_name) | |
# responder = Backend(pdf_path) | |
while True: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Responder(): | |
def __init__(self, index) -> None: | |
self.llm = OpenaiLanguageModel(anonymize=False) | |
self.index = index | |
def text_to_embedding(self, text): | |
""" | |
Generate an embedding for the given text using BERT. | |
Parameters: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_embeddings(chunks_with_metadata): | |
""" | |
Generate embeddings for each chunk using BERT. | |
Parameters: | |
- chunks_with_metadata (list): A list of dictionaries containing chunk and page number. | |
Returns: | |
- list: A list of dictionaries with embeddings and metadata. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Backend(pdf_path): | |
pdf_name = os.path.basename(pdf_path) | |
index_name = os.path.splitext(pdf_name)[0] # remove extension | |
if index_name not in pinecone.list_indexes(): | |
pinecone.create_index( | |
name=index_name, | |
dimension=768, # because I use bert-base-uncased |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__ == "__main__": | |
pdf_name = input("What is the name of the PDF file you want to chat with ?\n\n") | |
# Check if the provided name has the .pdf extension and add it if not | |
if not pdf_name.lower().endswith('.pdf'): | |
pdf_name += '.pdf' | |
pdf_folder = 'pdfs' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Responder(): | |
def __init__(self, collection) -> None: | |
self.llm = OpenaiLanguageModel(anonymize=False) | |
self.collection = collection | |
def __call__(self, question) -> Any: | |
results = self.collection.query( | |
query_texts=["This is a query document"], | |
n_results=10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Backend(pdf_path): | |
pdf_name = os.path.basename(pdf_path) | |
index_name = os.path.splitext(pdf_name)[0] # remove extension | |
if index_name not in chroma_client.list_collections(): | |
collection = chroma_client.create_collection(name=index_name) | |
# wait a moment for the collection to be fully initialized |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.agents import load_tools | |
from langchain.agents import initialize_agent | |
from langchain.agents import AgentType | |
from langchain.llms import OpenAI | |
from config import OPENAI_API_KEY, SERPAPI_API_KEY | |
import os | |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
os.environ['SERPAPI_API_KEY'] = SERPAPI_API_KEY |