janakiramm · March 8, 2024 06:14 · datadaydad · Apr 26, 2024 · Naga-d3v · May 9, 2024
diff --git a/rag_gemini.py b/rag_gemini.py
 # The previous part of this tutorial is at https://gist.github.com/janakiramm/55d2d8ec5d14dd45c7e9127d81cdafcd

 from vertexai.language_models import TextEmbeddingModel
 from google.cloud import aiplatform
 import vertexai
 from vertexai.preview.generative_models import GenerativeModel, Part
 import json
 import os

 project=”YOUR_GCP_PROJECT”
 location="us-central1"
 sentence_file_path = "lakeside_sentences.json"
 index_name="INDEX_EP_ID" #Get this from the console or the previous step

 aiplatform.init(project=project,location=location)
 vertexai.init()
 model = GenerativeModel("gemini-pro")
 lakeside_index_ep = aiplatform.MatchingEngineIndexEndpoint(index_endpoint_name=index_name)

 def generate_text_embeddings(sentences) -> list:    
    model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")
    embeddings = model.get_embeddings(sentences)
    vectors = [embedding.values for embedding in embeddings]
    return vectors

 def generate_context(ids,data):
    concatenated_names = ''
    for id in ids:
        for entry in data:
            if entry['id'] == id:
                concatenated_names += entry['sentence'] + "\n" 
    return concatenated_names.strip()
  
 data=load_file(sentence_file_path)

 #query=["How many days of unpaid leave in an year"]
 #query=["Allowed cost of online course"]
 #query=["process for applying sick leave"]
 query=["process for applying personal leave"]
 qry_emb=generate_text_embeddings(query)

 response = lakeside_index_ep.find_neighbors(
    deployed_index_id = index_name,
    queries = [qry_emb[0]],
    num_neighbors = 10
 )

 matching_ids = [neighbor.id for sublist in response for neighbor in sublist]

 context = generate_context(matching_ids,data)
 prompt=f"Based on the context delimited in backticks, answer the query. ```{context}``` {query}"

 chat = model.start_chat(history=[])
 response = chat.send_message(prompt)
 print(response.text)
	# The previous part of this tutorial is at https://gist.github.com/janakiramm/55d2d8ec5d14dd45c7e9127d81cdafcd

	from vertexai.language_models import TextEmbeddingModel
	from google.cloud import aiplatform
	import vertexai
	from vertexai.preview.generative_models import GenerativeModel, Part
	import json
	import os

	project=”YOUR_GCP_PROJECT”
	location="us-central1"
	sentence_file_path = "lakeside_sentences.json"
	index_name="INDEX_EP_ID" #Get this from the console or the previous step

	aiplatform.init(project=project,location=location)
	vertexai.init()
	model = GenerativeModel("gemini-pro")
	lakeside_index_ep = aiplatform.MatchingEngineIndexEndpoint(index_endpoint_name=index_name)

	def generate_text_embeddings(sentences) -> list:
	model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")
	embeddings = model.get_embeddings(sentences)
	vectors = [embedding.values for embedding in embeddings]
	return vectors

	def generate_context(ids,data):
	concatenated_names = ''
	for id in ids:
	for entry in data:
	if entry['id'] == id:
	concatenated_names += entry['sentence'] + "\n"
	return concatenated_names.strip()

	data=load_file(sentence_file_path)

	#query=["How many days of unpaid leave in an year"]
	#query=["Allowed cost of online course"]
	#query=["process for applying sick leave"]
	query=["process for applying personal leave"]
	qry_emb=generate_text_embeddings(query)

	response = lakeside_index_ep.find_neighbors(
	deployed_index_id = index_name,
	queries = [qry_emb[0]],
	num_neighbors = 10
	)

	matching_ids = [neighbor.id for sublist in response for neighbor in sublist]

	context = generate_context(matching_ids,data)
	prompt=f"Based on the context delimited in backticks, answer the query. ```{context}``` {query}"

	chat = model.start_chat(history=[])
	response = chat.send_message(prompt)
	print(response.text)