Created
August 3, 2023 06:57
-
-
Save janakiramm/1325a5c5f792f80c1db6a9bae40f8a37 to your computer and use it in GitHub Desktop.
ChromaDB for RAG with OpenAI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import openai | |
import chromadb | |
from chromadb.utils import embedding_functions | |
import os | |
df=pd.read_csv('./data/oscars.csv') | |
df=df.loc[df['year_ceremony'] == 2023] | |
df=df.dropna(subset=['film']) | |
df['category'] = df['category'].str.lower() | |
df['text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' to win the award' | |
df.loc[df['winner'] == False, 'text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' but did not win' | |
def text_embedding(text) -> None: | |
response = openai.Embedding.create(model="text-embedding-ada-002", input=text) | |
return response["data"][0]["embedding"] | |
openai_ef = embedding_functions.OpenAIEmbeddingFunction( | |
api_key=os.environ["OPENAI_API_KEY"], | |
model_name="text-embedding-ada-002" | |
) | |
client = chromadb.Client() | |
collection = client.get_or_create_collection("oscars-2023",embedding_function=openai_ef) | |
docs=df["text"].tolist() | |
ids= [str(x) for x in df.index.tolist()] | |
collection.add( | |
documents=docs, | |
ids=ids | |
) | |
vector=text_embedding("Nominations for music") | |
results=collection.query( | |
query_embeddings=vector, | |
n_results=15, | |
include=["documents"] | |
) | |
res = "\n".join(str(item) for item in results['documents'][0]) | |
prompt=f'```{res}```who won the award for the original song' | |
messages = [ | |
{"role": "system", "content": "You answer questions about 95th Oscar awards."}, | |
{"role": "user", "content": prompt} | |
] | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=messages, | |
temperature=0 | |
) | |
response_message = response["choices"][0]["message"]["content"] | |
print(response_message) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment