Created
September 17, 2024 16:13
-
-
Save derickson/dc9843ee7b123b00c019e4ae5c9e11ab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Install the required packages | |
## pip install -qU elasticsearch openai | |
import os | |
from elasticsearch import Elasticsearch | |
from openai import OpenAI | |
es_client = Elasticsearch( | |
"undefined", | |
api_key=os.environ["ES_API_KEY"] | |
) | |
openai_client = OpenAI( | |
api_key=os.environ["OPENAI_API_KEY"], | |
) | |
index_source_fields = { | |
"restaurant_reviews": [ | |
"semantic_body" | |
] | |
} | |
def get_elasticsearch_results(query): | |
es_query = { | |
"retriever": { | |
"rrf": { | |
"retrievers": [ | |
{ | |
"standard": { | |
"query": { | |
"nested": { | |
"path": "semantic_body.inference.chunks", | |
"query": { | |
"sparse_vector": { | |
"inference_id": "my-elser-endpoint", | |
"field": "semantic_body.inference.chunks.embeddings", | |
"query": query | |
} | |
}, | |
"inner_hits": { | |
"size": 2, | |
"name": "restaurant_reviews.semantic_body", | |
"_source": [ | |
"semantic_body.inference.chunks.text" | |
] | |
} | |
} | |
} | |
} | |
}, | |
{ | |
"standard": { | |
"query": { | |
"multi_match": { | |
"query": query, | |
"fields": [ | |
"Restaurant.text", | |
"Review" | |
] | |
} | |
} | |
} | |
} | |
] | |
} | |
}, | |
"size": 3 | |
} | |
result = es_client.search(index="restaurant_reviews", body=es_query) | |
return result["hits"]["hits"] | |
def create_openai_prompt(question, results): | |
context = "" | |
for hit in results: | |
inner_hit_path = f"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}" | |
## For semantic_text matches, we need to extract the text from the inner_hits | |
if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']: | |
context += '\n --- \n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits']) | |
else: | |
source_field = index_source_fields.get(hit["_index"])[0] | |
hit_context = hit["_source"][source_field] | |
context += f"{hit_context}\n" | |
prompt = f""" | |
Instructions: | |
- You are an assistant for question-answering tasks. | |
- Answer questions truthfully and factually using only the context presented. | |
- If you don't know the answer, just say that you don't know, don't make up an answer. | |
- You must always cite the document where the answer was extracted using inline academic citation style [], using the position. | |
- Use markdown format for code examples. | |
- You are correct, factual, precise, and reliable. | |
Context: | |
{context} | |
""" | |
return prompt | |
def generate_openai_completion(user_prompt, question): | |
response = openai_client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": user_prompt}, | |
{"role": "user", "content": question}, | |
] | |
) | |
return response.choices[0].message.content | |
if __name__ == "__main__": | |
question = "my question" | |
elasticsearch_results = get_elasticsearch_results(question) | |
context_prompt = create_openai_prompt(elasticsearch_results) | |
openai_completion = generate_openai_completion(context_prompt, question) | |
print(openai_completion) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment