Created
May 20, 2023 17:26
-
-
Save morganmcg1/e8324610dbd2d0b78229a241de90eba1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In this notebook we will automatically generate a set of evaluation questions based on wandb docs | |
import random | |
import wandb | |
import re | |
import openai | |
import os | |
from tqdm.auto import tqdm | |
import time | |
from langchain.docstore.document import Document | |
from langchain.document_loaders import UnstructuredMarkdownLoader | |
from langchain.text_splitter import MarkdownTextSplitter | |
PROJECT = "wandbot_synth" | |
ENTITY = "wandbot" | |
import openai | |
from getpass import getpass | |
def get_openai_key(): | |
if os.getenv("OPENAI_API_KEY") is None: | |
if any(['VSCODE' in x for x in os.environ.keys()]): | |
print('Please enter password in the VS Code prompt at the top of your VS Code window!') | |
os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n") | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key" | |
print("OpenAI API key configured") | |
cohere_api_key = "" | |
get_openai_key() | |
# # Answer Questions with WandBot | |
import time | |
from typing import Any, Dict, List | |
import json | |
import wandb | |
from wandb.sdk.lib.runid import generate_id | |
from wandb.integration.langchain import WandbTracer | |
from langchain import LLMChain | |
from langchain.chains import HypotheticalDocumentEmbedder, RetrievalQAWithSourcesChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.prompts.chat import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
from langchain.schema import Document | |
from langchain.vectorstores import FAISS | |
from langchain.vectorstores.base import VectorStoreRetriever | |
from langchain.chains import HypotheticalDocumentEmbedder, RetrievalQAWithSourcesChain | |
from langchain.callbacks import get_openai_callback | |
class VectorStoreRetrieverWithScore(VectorStoreRetriever): | |
def get_relevant_documents(self, query: str) -> List[Document]: | |
if self.search_type == "similarity": | |
docs_and_scores = self.vectorstore.similarity_search_with_score( | |
query, **self.search_kwargs | |
) | |
docs = [] | |
for doc, score in docs_and_scores: | |
doc.metadata["score"] = score | |
docs.append(doc) | |
elif self.search_type == "mmr": | |
docs = self.vectorstore.max_marginal_relevance_search( | |
query, **self.search_kwargs | |
) | |
else: | |
raise ValueError(f"search_type of {self.search_type} not allowed.") | |
return docs | |
class FAISSWithScore(FAISS): | |
def as_retriever(self) -> VectorStoreRetrieverWithScore: | |
return VectorStoreRetrieverWithScore( | |
vectorstore=self, | |
search_type="similarity", | |
search_kwargs={"k": 10}, | |
) | |
class RetrievalQAWithSourcesChainWithScore(RetrievalQAWithSourcesChain): | |
reduce_k_below_max_tokens: bool = True | |
max_tokens_limit: int = 2816 | |
def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: | |
question = inputs[self.question_key] | |
docs = self.retriever.get_relevant_documents(question) | |
return self._reduce_tokens_below_limit(docs) | |
def load_artifacts(config): | |
faiss_artifact = wandb.use_artifact(config.faiss_artifact, type="search_index") | |
faiss_artifact_dir = faiss_artifact.download() | |
hyde_prompt_artifact = wandb.use_artifact( | |
config.hyde_prompt_artifact, type="prompt" | |
) | |
hyde_artifact_dir = hyde_prompt_artifact.download() | |
hyde_prompt_file = f"{hyde_artifact_dir}/hyde_prompt.txt" | |
chat_prompt_artifact = wandb.use_artifact( | |
config.chat_prompt_artifact, type="prompt" | |
) | |
chat_artifact_dir = chat_prompt_artifact.download() | |
chat_prompt_file = f"{chat_artifact_dir}/chat_prompt.txt" | |
return { | |
"faiss": faiss_artifact_dir, | |
"hyde_prompt": hyde_prompt_file, | |
"chat_prompt": chat_prompt_file, | |
} | |
# In[10]: | |
import json | |
def parse_source_documents(source_documents): | |
source_docs_dict = {} | |
for i,source_doc in enumerate(source_documents): | |
source_docs_dict[f"source_doc_{i}"] ={ | |
"page_content":source_doc.page_content, | |
"metadata":source_doc.metadata["source"], | |
"lookup_index":source_doc.lookup_index, | |
"lookup_str":source_doc.lookup_str, | |
} | |
return json.dumps(source_docs_dict) | |
# In[11]: | |
# qa_chain.json | |
# In[12]: | |
from types import SimpleNamespace | |
# login to openai with your api key | |
get_openai_key() | |
wandbot_config = SimpleNamespace( | |
faiss_artifact="parambharat/wandb_docs_bot/faiss_store:latest", | |
hyde_prompt_artifact="parambharat/wandb_docs_bot/hyde_prompt:latest", | |
chat_prompt_artifact="parambharat/wandb_docs_bot/system_prompt:latest", | |
model_name="gpt-3.5-turbo", | |
eval_model = 'command-nightly', | |
temperature=0, | |
hyde_llm_temperature=0.3, | |
command_llm_temperature=0.0, | |
cohere_generate_cost_usd = 0.0000025 # cost per characters (not tokens), $0.0025 per generation unit (1000 chars) | |
) | |
wandb.init( | |
name="synth_answer_generation_test", | |
project=PROJECT, | |
entity=ENTITY, | |
config=wandbot_config, | |
) | |
artifacts = load_artifacts(wandb.config) | |
# ### Prompts | |
# Load wandbot v1 prompts | |
# In[13]: | |
# LOAD DATA AND PROMPTS FROM ARTIFACTS | |
faiss_dir = artifacts["faiss"] | |
hyde_prompt_template = open(artifacts["hyde_prompt"]).read() | |
wandbot_v1_system_prompt_template = open(artifacts["chat_prompt"]).read() | |
human_message_prompt_template = "{question}" | |
# SETUP Hypothetical Document Embedder (HyDE) | |
hyde_messages = [ | |
SystemMessagePromptTemplate.from_template(hyde_prompt_template), | |
HumanMessagePromptTemplate.from_template("{question}"), | |
] | |
hyde_prompt = ChatPromptTemplate.from_messages(hyde_messages) | |
# ### Alternative System Prompts | |
# | |
# Create alternate wandbot prompts to test | |
# In[14]: | |
system_prompts = {} | |
system_prompts["wandbot_v1_few_shot"] = wandbot_v1_system_prompt_template | |
system_prompts["wandbot_v1_zero_shot"]= """ | |
As an AI assistant for the open source library wandb, your task is to answer questions based on | |
the given extracted parts of a long document and the question. You can provide a conversational | |
answer with a hyperlink to the documentation only if it is explicitly listed as a source in the context. | |
Provide a code block directly from the documentation wherever possible. If you do not know the answer, | |
you can say "Hmm, I'm not sure." If the question is not related to wandb or Weights & Biases, politely | |
inform the user that you can only answer questions related to wandb. The documentation for wandb can be | |
found at https://docs.wandb.ai. | |
Begin: | |
================ | |
Question: {question} | |
================ | |
{summaries} | |
================ | |
Final Answer in Markdown: | |
""" | |
system_prompts["default_langchain_qa"]= """ | |
Use the following pieces of context to answer the question at the end. If you don't know the answer, | |
just say that you don't know, don't try to make up an answer. | |
{summaries} | |
Question: {question} | |
Helpful Answer: | |
""" | |
# Get prompt token counts | |
# In[15]: | |
import tiktoken | |
enc = tiktoken.encoding_for_model("gpt-4") | |
jj = enc.encode("hello world") | |
len(jj) | |
for k in system_prompts.keys(): | |
print(f"{k} token count: {len(enc.encode(system_prompts[k]))}") | |
# ### Evaluation Propmt | |
# Cohere Command Grader Prompt | |
# In[16]: | |
human_prompt = "Human:" | |
assistant_prompt = "GRADER_RESPONSE:" | |
grade_command = """Grade the following WANDBOT_RESPONSE given the USER_QUESTION and SUPPORTING_DOCUMENTATION. | |
Grade the WANDBOT_RESPONSE based ONLY on its factual accuracy. It is OK if the WANDBOT_RESPONSE contains more information than in SUPPORTING_DOCUMENTATION, as long as it does not contain any conflicting statements. | |
Your GRADE should only be POSITIVE or NEGATIVE to indicate whether the WANDBOT_RESPONSE is accurate or not given the SUPPORTING_DOCUMENTATION, no other information is required. | |
If the WANDBOT_RESPONSE answers that there is no specific information provided in the context or that it doesn't know, then the GRADE is NEGATIVE. | |
Only respond with POSITIVE or NEGATIVE for GRADE.""" | |
def command_eval_prompt_constructor(question, source_documents, answer, grade_command=grade_command): | |
evaluation_prompts_template = f"""As an experienced software quality assurance tester, you are evaluating the quality of the response (WANDBOT_RESPONSE) from a | |
Weights & Biasses (aka wandb, W&B) support bot called `wandbot`. Weights & Biasses is a machine learning ops (MLOps) python library and app. | |
Supporting documentation (SUPPORTING_DOCUMENTATION) is provided to help you assess the quality of the response. You job is to grade (GRADE) the response. | |
This is the example format of the input and a grade given to the `wandbot` support bot response: | |
===================== | |
USER_QUESTION: user question here | |
WANDBOT_RESPONSE: the response from the `wandbot` support bot here | |
SUPPORTING_DOCUMENTATION: retrieved documentation from the wandb docs here | |
{assistant_prompt} GRADE: POSITIVE or NEGATIVE here | |
===================== | |
this is a real examples: | |
===================== | |
USER_QUESTION: How do I create a wandb sweep? | |
WANDBOT_RESPONSE: To create a W&B Articfact, you can use the wandb.Artifact class like so ```artifact = wandb.Artifact(name='bicycle-dataset', type='dataset')``` | |
SUPPORTING_DOCUMENTATION: '{{"source_doc_0": {{"page_content": "Use Weights & Biases Sweeps to automate hyperparameter search and explore the space of possible models. Create a sweep with a few lines of code.", "metadata": "https://docs.wandb.ai/guide", "lookup_index": 0, "lookup_str": ""}}}}' | |
{assistant_prompt} GRADE: NEGATIVE | |
===================== | |
{grade_command} | |
USER_QUESTION: {question} | |
WANDBOT_RESPONSE: {answer} | |
SUPPORTING_DOCUMENTATION: {source_documents} | |
{assistant_prompt} GRADE:""" | |
return evaluation_prompts_template | |
question = "what is wandb?" | |
answer = "Weights & Biases is a machine learning platform for teams." | |
source_documents = "[hey, ho]" | |
# print(command_eval_prompt_constructor(question, source_documents, answer, grade_command)) | |
# Cohere Command prompt template | |
# In[17]: | |
from tokenizers import Tokenizer | |
eval_grader_prompt_template = command_eval_prompt_constructor("", "", "") | |
command_nightly_tokenizer = Tokenizer.from_pretrained("Cohere/command-nightly") | |
prompt_enc = command_nightly_tokenizer.encode(eval_grader_prompt_template) | |
print(f"Command prompt template token count: {len(prompt_enc.ids)}") | |
# Claude Grader Prompt | |
# In[18]: | |
# def claude_eval_prompt_constructor(question, source_documents, answer): | |
# evaluation_prompts_template = f"""As an experienced software quality assurance tester, you are evaluating the quality of the response from a | |
# Weights & Biasses (aka wandb, W&B) support bot called wandbot. Weights & Biasses is a machine learning ops (MLOps) python library and app. | |
# Supporting documentation is provided to help you assess the quality of the response. | |
# Your feedback should only only be "POSITIVE" or "NEGATIVE" to indicate whether the response is accurate or not, | |
# no other information is required. For example: | |
# {anthropic.HUMAN_PROMPT} | |
# ===================== | |
# USER_QUESTION: What is wandb? | |
# SUPPORTING_DOCUMENTATION: '{{"source_doc_0": {{"page_content": "Weights & Biases is the machine learning platform for developers to build better models faster", "metadata": "https://docs.wandb.ai/guide", "lookup_index": 0, "lookup_str": ""}}}}' | |
# WANDBOT_RESPONSE: Weights & Biases is a machine learning platform for teams. | |
# {anthropic.AI_PROMPT} | |
# POSITIVE | |
# {anthropic.HUMAN_PROMPT} | |
# ===================== | |
# USER_QUESTION: How do I create a wandb sweep? | |
# SUPPORTING_DOCUMENTATION: '{{"source_doc_0": {{"page_content": "Use Weights & Biases Sweeps to automate hyperparameter search and explore the space of possible models. Create a sweep with a few lines of code.", "metadata": "https://docs.wandb.ai/guide", "lookup_index": 0, "lookup_str": ""}}}}' | |
# WANDBOT_RESPONSE: To create a W&B Articfact, you can use the wandb.Artifact class like so ```artifact = wandb.Artifact(name='bicycle-dataset', type='dataset')``` | |
# {anthropic.AI_PROMPT} | |
# NEGATIVE | |
# {anthropic.HUMAN_PROMPT} | |
# ===================== | |
# USER_QUESTION: {question} | |
# SUPPORTING_DOCUMENTATION: {source_documents} | |
# WANDBOT_RESPONSE: {answer} | |
# {anthropic.AI_PROMPT}""" | |
# return evaluation_prompts_template | |
# question = "what is wandb?" | |
# answer = "Weights & Biases is a machine learning platform for teams." | |
# print(claude_eval_prompt_constructor(question, source_documents, answer)) | |
# ### Load Embeddings and Vector Store | |
# In[19]: | |
base_embeddings = OpenAIEmbeddings() | |
embeddings = HypotheticalDocumentEmbedder( | |
llm_chain=LLMChain(llm=ChatOpenAI( | |
temperature=wandb.config.hyde_llm_temperature,), | |
prompt=hyde_prompt), | |
base_embeddings=base_embeddings, | |
verbose=True | |
) | |
# LOAD FAISS VECTOR STORE | |
vector_store = FAISSWithScore.load_local(faiss_dir, embeddings) | |
# In[20]: | |
# LOAD QA CHAINS FOR EACH SYSTEM PROMPT | |
def load_qa_chain(system_prompt_template, vector_store=vector_store, chain_type="stuff"): | |
qa_messages = [ | |
SystemMessagePromptTemplate.from_template(system_prompt_template, input_variables=["context", "question"]), | |
HumanMessagePromptTemplate.from_template(human_message_prompt_template), | |
] | |
qa_prompt = ChatPromptTemplate.from_messages(qa_messages) | |
llm = ChatOpenAI( | |
model_name=wandb.config.model_name, | |
temperature=wandb.config.temperature, | |
request_timeout=20 | |
) | |
qa_chain = RetrievalQAWithSourcesChainWithScore.from_chain_type( | |
llm = llm, | |
chain_type=chain_type, | |
retriever=vector_store.as_retriever(), | |
chain_type_kwargs={"prompt": qa_prompt}, | |
return_source_documents=True, | |
verbose=True | |
) | |
return qa_chain | |
# Create timestamps | |
# In[21]: | |
import pandas as pd | |
import numpy as np | |
def generate_timestamps(n=10000, start_date='2023-03-01', end_date='2023-05-31'): | |
# Range of datetimes with 1-minute intervals | |
rng = pd.date_range(start_date, end_date, freq='S') | |
# Create weights for all datetimes | |
weights = pd.Series(1, index=rng) | |
# Decrease weights for weekends | |
weights[rng.to_series().dt.dayofweek > 4] *= 0.5 | |
# Decrease weight for Easter Sunday (2023-04-09) | |
easter = pd.to_datetime('2023-04-09') | |
weights[rng.to_series().between(easter, easter + pd.DateOffset(days=1))] *= 0.5 | |
easter_monday = pd.to_datetime('2023-04-10') | |
weights[rng.to_series().between(easter, easter + pd.DateOffset(days=1))] *= 0.5 | |
# Increase weights for 8am-6pm on weekdays | |
mask = ((rng.to_series().dt.hour >= 8) & (rng.to_series().dt.hour <= 18) & (rng.to_series().dt.dayofweek <= 4)) | |
weights[mask] *= 1.2 | |
# Increase weights for Tuesday, Wednesday, Thursday | |
mask = ((rng.to_series().dt.dayofweek >= 1) & (rng.to_series().dt.dayofweek <= 3)) | |
weights[mask] *= 1.5 | |
# Normalize weights | |
weights /= weights.sum() | |
# Sample 10000 datetimes using weights | |
sampled_datetimes = np.random.choice(rng, size=n, p=weights) | |
# Sort the datetimes | |
sampled_datetimes.sort() | |
return sampled_datetimes | |
timestamps = generate_timestamps() | |
len(timestamps) | |
# In[22]: | |
import matplotlib.pyplot as plt | |
def plot_timestamps(timestamps): | |
plt.figure(figsize=(10,6)) | |
plt.hist(timestamps, bins=100, alpha=0.5, color='blue') | |
plt.xlabel('Datetime') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Timestamps') | |
plt.show() | |
timestamps = generate_timestamps() | |
# plot_timestamps(timestamps) | |
# In[23]: | |
# WANDB LOGGING CONFIG | |
wandb_config = {"project": PROJECT, "entity":ENTITY} # config for OpenAI autologger | |
table_cols = [ | |
"request_timestamp","query_id", "query", "wandbot_answer", "retrived_source_documents", | |
"synth_user_feedback_signal", "elapsed_time_s", | |
"prompt_tokens", "completion_tokens", "total_tokens", | |
"answer_cost_usd", "successful_requests", | |
"system_prompt_version", "system_prompt_template", "human_message_prompt_template", "hyde_prompt_template", "eval_prompt_template", | |
"wandb_run_id", "wandbot_model", "wandbot_temperature", "hyde_llm_temperature", | |
"eval_model", "eval_elapsed_time_s", | |
"eval_total_chars", "eval_cost_usd", "eval_total_tokens", "eval_prompt_tokens", "eval_completion_tokens" | |
] | |
# ### Load Questions | |
# In[35]: | |
# import pandas as pd | |
# df = pd.read_csv('sythetic-user-questions_2023-05-14.csv') | |
# questions = df["question"].values | |
# questions = questions[:5] | |
# questions | |
artifact = wandb.use_artifact('wandbot/wandbot_synth/run-2cv1ao9n-generated_questions_table:v0', type='run_table') | |
# artifact_dir = artifact.download("data") | |
df = artifact.get("generated_questions_table").get_dataframe() | |
# with open('data/generated_questions_table.table.json') as f: | |
# js = json.load(f) | |
# columns = js['columns'] | |
# data = js['data'] | |
# df = pd.DataFrame(data, columns=columns) | |
questions = df["question"].values | |
# shuffle the questions | |
np.random.shuffle(questions) | |
print(len(questions)) | |
df.head() | |
# Setup Evaluation Model | |
# In[28]: | |
import cohere | |
def calculate_eval_tokens(eval_grader_prompt, eval_completion): | |
prompt_enc = command_nightly_tokenizer.encode(eval_grader_prompt) | |
prompt_tokens_count = len(prompt_enc.ids) | |
completion_enc = command_nightly_tokenizer.encode(eval_completion) | |
completion_token_count = len(completion_enc.ids) | |
completion_total_tokens = prompt_tokens_count + completion_token_count | |
return completion_total_tokens, prompt_tokens_count, completion_token_count | |
co = cohere.Client(cohere_api_key) | |
# ## Run Synth WandBot | |
# Setup chain variants | |
# In[27]: | |
chains = {} | |
for system_prompt in system_prompts.keys(): | |
chains[f"{system_prompt}"] = load_qa_chain(system_prompts[system_prompt], vector_store, chain_type="stuff") | |
# chains.keys() | |
# In[29]: | |
import traceback | |
import langchain; | |
langchain.debug=False | |
for i_q, question in enumerate(questions): | |
# USER QUERY | |
query_id = generate_id(length=16) | |
tstamp = timestamps[i_q] | |
# RUN CHAIN | |
system_prompt = random.choice(list(chains.keys())) | |
qa_chain = chains[system_prompt] | |
try: | |
start_time = time.time() | |
with get_openai_callback() as openai_cb: | |
response = qa_chain({"question": question}, | |
callbacks=[WandbTracer(wandb_config)], | |
return_only_outputs=False, | |
) | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
answer = response["answer"] | |
# RETRIEVED DOCUMENTS | |
source_docs = response["source_documents"] | |
source_documents = parse_source_documents(source_docs) | |
# TOKEN METRICS | |
prompts_tokens = openai_cb.prompt_tokens | |
completion_tokens = openai_cb.completion_tokens | |
total_tokens = openai_cb.total_tokens | |
total_cost = openai_cb.total_cost | |
successful_requests = openai_cb.successful_requests | |
# GENERATE SYNTHETIC USER FEEDBACK | |
eval_grader_prompt = command_eval_prompt_constructor(question, source_documents, answer) | |
eval_completion = "" | |
eval_total_tokens, eval_prompt_tokens, eval_completion_tokens = calculate_eval_tokens(eval_grader_prompt, eval_completion) | |
eval_start_time = time.time() | |
response = co.generate( | |
model=wandb.config.eval_model, | |
prompt = eval_grader_prompt, | |
max_tokens=50, | |
temperature=wandb.config.command_llm_temperature, | |
stop_sequences=["====================="], | |
truncate="end" | |
) | |
eval_end_time = time.time() | |
eval_elapsed_time = eval_end_time - eval_start_time | |
eval_completion = response.generations[0].text | |
# Get eval token counts | |
eval_total_tokens, eval_prompt_tokens, eval_completion_tokens = calculate_eval_tokens(eval_grader_prompt, eval_completion) | |
eval_total_chars = len(eval_grader_prompt) + len(eval_completion) | |
eval_cost = eval_total_chars * wandb.config.cohere_generate_cost_usd | |
synth_user_feedback = "POSITIVE" if "positive" in eval_completion.lower() else "NEGATIVE" | |
# synth_user_feedback = "POSITIVE" | |
# LOG TO WANDB | |
wandb_table = wandb.Table(table_cols) | |
wandb_table.add_data(tstamp, query_id, question, answer, source_documents, | |
synth_user_feedback, elapsed_time, | |
prompts_tokens, completion_tokens, total_tokens, | |
total_cost, successful_requests, | |
system_prompt, system_prompts[system_prompt], human_message_prompt_template, hyde_prompt_template, eval_grader_prompt_template, | |
wandb.run.id, wandb.config.model_name, wandb.config.temperature, wandb.config.hyde_llm_temperature, | |
wandb.config.eval_model, eval_elapsed_time, | |
eval_total_chars, eval_cost, eval_total_tokens, eval_prompt_tokens, eval_completion_tokens) | |
wandb.log({"logs/qa_with_eval": wandb_table}) | |
except Exception as e: | |
print(f"Question {i_q}, Error occured: {e}") | |
traceback.print_exc() | |
# break | |
if i_q % 20 == 0: print(i_q) | |
# In[27]: | |
print("DONE!") | |
# In[ ]: | |
# config_table_cols = ["query_id","run_id", "system_prompt_template", | |
# "human_message_prompt_template", "hyde_prompt_template", "eval_grader_prompt_template"] | |
# config_table = wandb.Table(config_table_cols) | |
# config_table.add_data(query_id, wandb.run.id, system_prompts[system_prompt], | |
# human_message_prompt_template, hyde_prompt_template, eval_grader_prompt_template) | |
# wandb.log({"logs/config_table_test": config_table}) | |
# In[ ]: | |
# # ANTHROPIC EVALUATION | |
# eval_model = "claude-v1.3-100k" # "claude-v1", | |
# # anthropic_api = "XXX" | |
# # client = anthropic.Client(api_key=anthropic_api) | |
# # max_tokens_to_sample = 100000 | |
# eval_prompt_template = claude_eval_prompt_constructor("", "", "") # Just to log the eval prompt template | |
# # eval_prompt = claude_eval_prompt_constructor(question, source_documents, answer) | |
# eval_start_time = time.time() | |
# # resp = client.completion( | |
# # prompt=eval_prompt, | |
# # stop_sequences=[anthropic.HUMAN_PROMPT], | |
# # model=eval_model, | |
# # max_tokens_to_sample=max_tokens_to_sample, | |
# # ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment