Skip to content

Instantly share code, notes, and snippets.

View fsndzomga's full-sized avatar

Franck Stéphane Ndzomga fsndzomga

View GitHub Profile
import random
import spacy
from spacy.training.example import Example
from spacy.util import minibatch
# Initialize or load an NLP object and get the NER pipeline
nlp = spacy.blank("en")
nlp.add_pipe("ner")
# Initialize the optimizer
from spacy.scorer import Scorer
scorer = Scorer()
# Example evaluation data in the same format as training data
EVAL_DATA = TRAIN_DATA = [
# GPS_COORDINATES
("Coordinates: 124.50 N, 68.95 W", {"entities": [(13, 29, "GPS_COORDINATES")]}),
("Location: 13.35 S, 57.80 E", {"entities": [(11, 24, "GPS_COORDINATES")]}),
("GPS: 24.50 N, 57.80 W", {"entities": [(5, 19, "GPS_COORDINATES")]}),
if __name__ == "__main__":
pdf_name = input("What is the name of the PDF file you want to chat with ?")
pdf_folder = 'pdfs'
pdf_path = os.path.join(pdf_folder,pdf_name)
# responder = Backend(pdf_path)
while True:
class Responder():
def __init__(self, index) -> None:
self.llm = OpenaiLanguageModel(anonymize=False)
self.index = index
def text_to_embedding(self, text):
"""
Generate an embedding for the given text using BERT.
Parameters:
def create_embeddings(chunks_with_metadata):
"""
Generate embeddings for each chunk using BERT.
Parameters:
- chunks_with_metadata (list): A list of dictionaries containing chunk and page number.
Returns:
- list: A list of dictionaries with embeddings and metadata.
def Backend(pdf_path):
pdf_name = os.path.basename(pdf_path)
index_name = os.path.splitext(pdf_name)[0] # remove extension
if index_name not in pinecone.list_indexes():
pinecone.create_index(
name=index_name,
dimension=768, # because I use bert-base-uncased
if __name__ == "__main__":
pdf_name = input("What is the name of the PDF file you want to chat with ?\n\n")
# Check if the provided name has the .pdf extension and add it if not
if not pdf_name.lower().endswith('.pdf'):
pdf_name += '.pdf'
pdf_folder = 'pdfs'
class Responder():
def __init__(self, collection) -> None:
self.llm = OpenaiLanguageModel(anonymize=False)
self.collection = collection
def __call__(self, question) -> Any:
results = self.collection.query(
query_texts=["This is a query document"],
n_results=10
def Backend(pdf_path):
pdf_name = os.path.basename(pdf_path)
index_name = os.path.splitext(pdf_name)[0] # remove extension
if index_name not in chroma_client.list_collections():
collection = chroma_client.create_collection(name=index_name)
# wait a moment for the collection to be fully initialized
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from config import OPENAI_API_KEY, SERPAPI_API_KEY
import os
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['SERPAPI_API_KEY'] = SERPAPI_API_KEY