izikeros · April 11, 2024 08:05
diff --git a/ragas_azureopenai.py b/ragas_azureopenai.py
 """
 Requires the following environment variables (you can use it as .env_template):

 COMPLETION_DEPLOYMENT_NAME=
 EMBEDDING_DEPLOYMENT_NAME=
 AZURE_OPENAI_API_KEY=
 AZURE_OPENAI_ENDPOINT=
 AZURE_OPENAI_API_VERSION=
 """

 import os
 from pprint import pprint

 import langchain
 import langchain_community
 import openai
 import ragas
 from datasets import Dataset
 from dotenv import find_dotenv, load_dotenv
 from langchain_openai.chat_models import AzureChatOpenAI
 from langchain_openai.embeddings import AzureOpenAIEmbeddings
 from ragas import evaluate
 from ragas.evaluation import Result
 from ragas.metrics import (
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness,
 )

 print("\n===========\nKey packages versions:\n===========\n")
 print("ragas version: ", ragas.__version__)
 print("openai version: ", openai.__version__)
 print("langchain version: ", langchain.__version__)
 print("langchain-community version: ", langchain_community.__version__)


 # Load from huggingface dataset repository
 # amnesty_qa = load_dataset(
 #     "explodinggradients/amnesty_qa", "english_v2", trust_remote_code=True
 # )

 # small dataset with two samples (two: questions, answers, contexts and ground truth)
 data_samples = {
    "question": ["When was the first super bowl?", "Who won the most super bowls?"],
    "answer": [
        "The first superbowl was held on January 15, 1967",
        "The most super bowls have been won by The New England Patriots",
    ],
    "contexts": [
        ["The Super Bowl....season since 1966,", "replacing the NFL...in February."],
        [
            "The Green Bay Packers...Green Bay, Wisconsin.",
            "The Packers compete...Football Conference",
        ],
    ],
    "ground_truth": [
        "The first superbowl was held on January 15, 1967",
        "The New England Patriots have won the Super Bowl a record six times",
    ],
 }
 dataset = Dataset.from_dict(data_samples)


 # list of metrics we're going to use
 metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
 ]

 print("\n===========\nEnvironment variables:\n===========\n")
 env = find_dotenv()
 load_dotenv(env)
 print(f"Loaded environment variables from {env}")

 azure_config = {
    "api_version": os.getenv("AZURE_OPENAI_API_VERSION"),
    "azure_completion_deployment": os.getenv("COMPLETION_DEPLOYMENT_NAME"),
    "azure_embedding_deployment": os.getenv("EMBEDDING_DEPLOYMENT_NAME"),
 }

 pprint(azure_config)

 # ===== Models =====
 azure_model = AzureChatOpenAI(
    api_version=azure_config["api_version"],
    azure_deployment=azure_config["azure_completion_deployment"],
 )

 # init the embeddings for answer_relevancy, answer_correctness and answer_similarity
 azure_embeddings = AzureOpenAIEmbeddings(
    api_version=azure_config["api_version"],
    azure_deployment=azure_config["azure_embedding_deployment"],
 )

 # select single random example from the dataset
 #  dataset = amnesty_qa["eval"].shuffle(seed=42).select(range(1))

 result: Result = evaluate(
    dataset=dataset,
    metrics=metrics,
    llm=azure_model,
    embeddings=azure_embeddings,
    is_async=False,
    raise_exceptions=True,
 )

 print("\n===========\nEvaluation result:\n===========\n")
 print(result)
 print("\n-----------\nResult object\n-----------\n")
 pprint(result.__dict__)

 # convert the result to pandas dataframe
 print("\n===========\nResult as pandas dataframe:\n===========\n")
 df = result.to_pandas()
 print(df.head())
 print("\n---------Columns---------\n")
 print(df.columns)

 # ======== save the dataframe to json files ========
 # ensure results dir exists
 os.makedirs("results", exist_ok=True)
 # all data related to given question together (orient="records") -> list of dictionaries
 df.to_json("results/result_df_records.json", orient="records", indent=2)
 # all data related to given metric together (orient="columns") -> dictionary of dictionaries
 df.to_json("results/result_df_columns.json", orient="columns", indent=2)
	"""
	Requires the following environment variables (you can use it as .env_template):

	COMPLETION_DEPLOYMENT_NAME=
	EMBEDDING_DEPLOYMENT_NAME=
	AZURE_OPENAI_API_KEY=
	AZURE_OPENAI_ENDPOINT=
	AZURE_OPENAI_API_VERSION=
	"""

	import os
	from pprint import pprint

	import langchain
	import langchain_community
	import openai
	import ragas
	from datasets import Dataset
	from dotenv import find_dotenv, load_dotenv
	from langchain_openai.chat_models import AzureChatOpenAI
	from langchain_openai.embeddings import AzureOpenAIEmbeddings
	from ragas import evaluate
	from ragas.evaluation import Result
	from ragas.metrics import (
	answer_relevancy,
	context_precision,
	context_recall,
	faithfulness,
	)

	print("\n===========\nKey packages versions:\n===========\n")
	print("ragas version: ", ragas.__version__)
	print("openai version: ", openai.__version__)
	print("langchain version: ", langchain.__version__)
	print("langchain-community version: ", langchain_community.__version__)


	# Load from huggingface dataset repository
	# amnesty_qa = load_dataset(
	# "explodinggradients/amnesty_qa", "english_v2", trust_remote_code=True
	# )

	# small dataset with two samples (two: questions, answers, contexts and ground truth)
	data_samples = {
	"question": ["When was the first super bowl?", "Who won the most super bowls?"],
	"answer": [
	"The first superbowl was held on January 15, 1967",
	"The most super bowls have been won by The New England Patriots",
	],
	"contexts": [
	["The Super Bowl....season since 1966,", "replacing the NFL...in February."],
	[
	"The Green Bay Packers...Green Bay, Wisconsin.",
	"The Packers compete...Football Conference",
	],
	],
	"ground_truth": [
	"The first superbowl was held on January 15, 1967",
	"The New England Patriots have won the Super Bowl a record six times",
	],
	}
	dataset = Dataset.from_dict(data_samples)


	# list of metrics we're going to use
	metrics = [
	faithfulness,
	answer_relevancy,
	context_recall,
	context_precision,
	]

	print("\n===========\nEnvironment variables:\n===========\n")
	env = find_dotenv()
	load_dotenv(env)
	print(f"Loaded environment variables from {env}")

	azure_config = {
	"api_version": os.getenv("AZURE_OPENAI_API_VERSION"),
	"azure_completion_deployment": os.getenv("COMPLETION_DEPLOYMENT_NAME"),
	"azure_embedding_deployment": os.getenv("EMBEDDING_DEPLOYMENT_NAME"),
	}

	pprint(azure_config)

	# ===== Models =====
	azure_model = AzureChatOpenAI(
	api_version=azure_config["api_version"],
	azure_deployment=azure_config["azure_completion_deployment"],
	)

	# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
	azure_embeddings = AzureOpenAIEmbeddings(
	api_version=azure_config["api_version"],
	azure_deployment=azure_config["azure_embedding_deployment"],
	)

	# select single random example from the dataset
	# dataset = amnesty_qa["eval"].shuffle(seed=42).select(range(1))

	result: Result = evaluate(
	dataset=dataset,
	metrics=metrics,
	llm=azure_model,
	embeddings=azure_embeddings,
	is_async=False,
	raise_exceptions=True,
	)

	print("\n===========\nEvaluation result:\n===========\n")
	print(result)
	print("\n-----------\nResult object\n-----------\n")
	pprint(result.__dict__)

	# convert the result to pandas dataframe
	print("\n===========\nResult as pandas dataframe:\n===========\n")
	df = result.to_pandas()
	print(df.head())
	print("\n---------Columns---------\n")
	print(df.columns)

	# ======== save the dataframe to json files ========
	# ensure results dir exists
	os.makedirs("results", exist_ok=True)
	# all data related to given question together (orient="records") -> list of dictionaries
	df.to_json("results/result_df_records.json", orient="records", indent=2)
	# all data related to given metric together (orient="columns") -> dictionary of dictionaries
	df.to_json("results/result_df_columns.json", orient="columns", indent=2)