fsndzomga · September 29, 2023 23:00
diff --git a/responder.py b/responder.py
 class Responder():
    def __init__(self, index) -> None:
        self.llm = OpenaiLanguageModel(anonymize=False)
        self.index = index

    def text_to_embedding(self, text):
        """
        Generate an embedding for the given text using BERT.

        Parameters:
        - text (str): The input text.

        Returns:
        - list: The embedding of the input text.
        """

        # Load pre-trained BERT tokenizer and model
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = BertModel.from_pretrained('bert-base-uncased')

        # Tokenize the input text and get the BERT embeddings
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        # Use mean pooling to get sentence embeddings
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

        return embedding

    def __call__(self, question) -> Any:
        vector = self.text_to_embedding(question)
        context = self.index.query(
            vector=vector,
            top_k=100,
            include_metadata=True,
            )

        # Extract chunk texts
        chunks = [match['metadata']['chunk'] for match in context['matches']]

        # Merge chunks into a single text
        merged_text = ' '.join(chunks)

        prompt = f"""
        Answer this question: {question}, using these informations from the document: {merged_text}
        """

        response = self.llm.generate(prompt)

        return response
	class Responder():
	def __init__(self, index) -> None:
	self.llm = OpenaiLanguageModel(anonymize=False)
	self.index = index

	def text_to_embedding(self, text):
	"""
	Generate an embedding for the given text using BERT.

	Parameters:
	- text (str): The input text.

	Returns:
	- list: The embedding of the input text.
	"""

	# Load pre-trained BERT tokenizer and model
	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
	model = BertModel.from_pretrained('bert-base-uncased')

	# Tokenize the input text and get the BERT embeddings
	inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	# Use mean pooling to get sentence embeddings
	embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

	return embedding

	def __call__(self, question) -> Any:
	vector = self.text_to_embedding(question)
	context = self.index.query(
	vector=vector,
	top_k=100,
	include_metadata=True,
	)

	# Extract chunk texts
	chunks = [match['metadata']['chunk'] for match in context['matches']]

	# Merge chunks into a single text
	merged_text = ' '.join(chunks)

	prompt = f"""
	Answer this question: {question}, using these informations from the document: {merged_text}
	"""

	response = self.llm.generate(prompt)

	return response