Skip to content

Instantly share code, notes, and snippets.

@fsndzomga
Created September 29, 2023 23:00
Show Gist options
  • Save fsndzomga/6e363c4e485c986ac085bef6341fd095 to your computer and use it in GitHub Desktop.
Save fsndzomga/6e363c4e485c986ac085bef6341fd095 to your computer and use it in GitHub Desktop.
class Responder():
def __init__(self, index) -> None:
self.llm = OpenaiLanguageModel(anonymize=False)
self.index = index
def text_to_embedding(self, text):
"""
Generate an embedding for the given text using BERT.
Parameters:
- text (str): The input text.
Returns:
- list: The embedding of the input text.
"""
# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
# Tokenize the input text and get the BERT embeddings
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
# Use mean pooling to get sentence embeddings
embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
return embedding
def __call__(self, question) -> Any:
vector = self.text_to_embedding(question)
context = self.index.query(
vector=vector,
top_k=100,
include_metadata=True,
)
# Extract chunk texts
chunks = [match['metadata']['chunk'] for match in context['matches']]
# Merge chunks into a single text
merged_text = ' '.join(chunks)
prompt = f"""
Answer this question: {question}, using these informations from the document: {merged_text}
"""
response = self.llm.generate(prompt)
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment