Franck Stéphane Ndzomga fsndzomga

70 followers · 29 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

fsndzomga / ner_training.py

Created September 27, 2023 23:09

	import random
	import spacy
	from spacy.training.example import Example
	from spacy.util import minibatch

	# Initialize or load an NLP object and get the NER pipeline
	nlp = spacy.blank("en")
	nlp.add_pipe("ner")

	# Initialize the optimizer

fsndzomga / ner_evaluation.py

Created September 27, 2023 23:10

	from spacy.scorer import Scorer

	scorer = Scorer()

	# Example evaluation data in the same format as training data
	EVAL_DATA = TRAIN_DATA = [
	# GPS_COORDINATES
	("Coordinates: 124.50 N, 68.95 W", {"entities": [(13, 29, "GPS_COORDINATES")]}),
	("Location: 13.35 S, 57.80 E", {"entities": [(11, 24, "GPS_COORDINATES")]}),
	("GPS: 24.50 N, 57.80 W", {"entities": [(5, 19, "GPS_COORDINATES")]}),

fsndzomga / chat-with-pdf-outline.py

Created September 29, 2023 20:52

	if __name__ == "__main__":
	pdf_name = input("What is the name of the PDF file you want to chat with ?")

	pdf_folder = 'pdfs'

	pdf_path = os.path.join(pdf_folder,pdf_name)

	# responder = Backend(pdf_path)

	while True:

fsndzomga / responder.py

Created September 29, 2023 23:00

	class Responder():
	def __init__(self, index) -> None:
	self.llm = OpenaiLanguageModel(anonymize=False)
	self.index = index

	def text_to_embedding(self, text):
	"""
	Generate an embedding for the given text using BERT.

	Parameters:

fsndzomga / chunks_embeddings.py

Created September 29, 2023 23:10


	def create_embeddings(chunks_with_metadata):
	"""
	Generate embeddings for each chunk using BERT.

	Parameters:
	- chunks_with_metadata (list): A list of dictionaries containing chunk and page number.

	Returns:
	- list: A list of dictionaries with embeddings and metadata.

fsndzomga / backend.py

Created September 29, 2023 23:11

	def Backend(pdf_path):

	pdf_name = os.path.basename(pdf_path)

	index_name = os.path.splitext(pdf_name)[0] # remove extension

	if index_name not in pinecone.list_indexes():
	pinecone.create_index(
	name=index_name,
	dimension=768, # because I use bert-base-uncased

fsndzomga / main.py

Created October 2, 2023 17:42


	if __name__ == "__main__":
	pdf_name = input("What is the name of the PDF file you want to chat with ?\n\n")

	# Check if the provided name has the .pdf extension and add it if not
	if not pdf_name.lower().endswith('.pdf'):
	pdf_name += '.pdf'

	pdf_folder = 'pdfs'

fsndzomga / responder.py

Created October 2, 2023 17:45


	class Responder():
	def __init__(self, collection) -> None:
	self.llm = OpenaiLanguageModel(anonymize=False)
	self.collection = collection

	def __call__(self, question) -> Any:
	results = self.collection.query(
	query_texts=["This is a query document"],
	n_results=10

fsndzomga / backend.py

Last active October 2, 2023 23:53


	def Backend(pdf_path):

	pdf_name = os.path.basename(pdf_path)

	index_name = os.path.splitext(pdf_name)[0] # remove extension

	if index_name not in chroma_client.list_collections():
	collection = chroma_client.create_collection(name=index_name)
	# wait a moment for the collection to be fully initialized

fsndzomga / zero-shot-react.py

Created October 7, 2023 21:34

	from langchain.agents import load_tools
	from langchain.agents import initialize_agent
	from langchain.agents import AgentType
	from langchain.llms import OpenAI
	from config import OPENAI_API_KEY, SERPAPI_API_KEY
	import os


	os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
	os.environ['SERPAPI_API_KEY'] = SERPAPI_API_KEY