morganmcg1 · January 23, 2024 16:50
diff --git a/moe_non_determinism.py b/moe_non_determinism.py
 import os
 import json
 import tqdm

 import wandb
 from openai import OpenAI

 from time import sleep
 from pathlib import Path

 from dotenv import load_dotenv
 load_dotenv()

 chat_models = ["gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-4-1106-preview"]

 together_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
 # together_models = []
 chat_models.extend(together_models)

 message_history = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Write a unique, surprising, extremely randomized story with highly unpredictable changes of events."}
 ]

 completion_models = ["text-davinci-003", "text-davinci-001", "davinci-instruct-beta", "davinci"]
 prompt = "[System: You are a helpful assistant]\n\nUser: Write a unique, surprising, extremely randomized story with highly unpredictable changes of events.\n\nAI:"

 results = []

 import time
 class TimeIt:
    def __init__(self, name): self.name = name
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): print(f"{self.name} took {time.time() - self.start} seconds")


 C = 30  # number of completions to make per model
 N = 128 # max_tokens

 wandb.init(project="moe-non-determinism", entity="morgan", config={"n_completions": C, "max_tokens": N})
 tbl = wandb.Table(columns=["model", "sequence"])

 # Testing chat models
 for model in chat_models:
    if model in together_models:
        api_key = os.environ.get("TOGETHER_API_KEY")
        base_url="https://api.together.xyz/"
        logit_bias = None
    else:
        api_key = os.environ.get("OPENAI_API_KEY")
        base_url = None
        logit_bias = {"100257": -100.0}
    client = OpenAI(api_key=api_key, base_url=base_url)
    sequences = set()
    errors = 0 # although I track errors, at no point were any errors ever emitted
    with TimeIt(model):
        for _ in range(C):
            try:
                completion = client.chat.completions.create(
                    model=model,
                    messages=message_history,
                    max_tokens=N,
                    temperature=0,
                    logit_bias=logit_bias, # this doesn't really do anything, because chat models don't do <|endoftext|> much
                )
                # print(completion)
                sequences.add(completion.choices[0].message.content)
                tbl.add_data(model, completion.choices[0].message.content)
                sleep(5) # cheaply avoid rate limiting
            except Exception as e:
                print('something went wrong for', model, e)
                errors += 1
    print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
    wandb.summary[f"{model}_unique_sequences"] = len(sequences)
    wandb.summary[f"{model}_errors"] = errors
    print(json.dumps(list(sequences)))
    results.append((len(sequences), model))

 # Testing completion models
 # for model in completion_models:
 #     sequences = set()
 #     errors = 0
 #     with TimeIt(model):
 #         for _ in range(C):
 #             try:
 #                 completion = openai.Completion.create(
 #                     model=model,
 #                     prompt=prompt,
 #                     max_tokens=N,
 #                     temperature=0,
 #                     logit_bias = {"50256": -100.0}, # prevent EOS
 #                 )
 #                 sequences.add(completion.choices[0].text)
 #                 sleep(1)
 #             except Exception as e:
 #                 print('something went wrong for', model, e)
 #                 errors += 1
 #     print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
 #     print(json.dumps(list(sequences)))
 #     results.append((len(sequences), model))

 # Printing table of results
 wandb.log({"results": tbl})
 print("\nTable of Results:")
 print("Num_Sequences\tModel_Name")
 for num_sequences, model_name in results:
    print(f"{num_sequences}\t{model_name}")
	import os
	import json
	import tqdm

	import wandb
	from openai import OpenAI

	from time import sleep
	from pathlib import Path

	from dotenv import load_dotenv
	load_dotenv()

	chat_models = ["gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-4-1106-preview"]

	together_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
	# together_models = []
	chat_models.extend(together_models)

	message_history = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Write a unique, surprising, extremely randomized story with highly unpredictable changes of events."}
	]

	completion_models = ["text-davinci-003", "text-davinci-001", "davinci-instruct-beta", "davinci"]
	prompt = "[System: You are a helpful assistant]\n\nUser: Write a unique, surprising, extremely randomized story with highly unpredictable changes of events.\n\nAI:"

	results = []

	import time
	class TimeIt:
	def __init__(self, name): self.name = name
	def __enter__(self): self.start = time.time()
	def __exit__(self, *args): print(f"{self.name} took {time.time() - self.start} seconds")


	C = 30 # number of completions to make per model
	N = 128 # max_tokens

	wandb.init(project="moe-non-determinism", entity="morgan", config={"n_completions": C, "max_tokens": N})
	tbl = wandb.Table(columns=["model", "sequence"])

	# Testing chat models
	for model in chat_models:
	if model in together_models:
	api_key = os.environ.get("TOGETHER_API_KEY")
	base_url="https://api.together.xyz/"
	logit_bias = None
	else:
	api_key = os.environ.get("OPENAI_API_KEY")
	base_url = None
	logit_bias = {"100257": -100.0}
	client = OpenAI(api_key=api_key, base_url=base_url)
	sequences = set()
	errors = 0 # although I track errors, at no point were any errors ever emitted
	with TimeIt(model):
	for _ in range(C):
	try:
	completion = client.chat.completions.create(
	model=model,
	messages=message_history,
	max_tokens=N,
	temperature=0,
	logit_bias=logit_bias, # this doesn't really do anything, because chat models don't do <\|endoftext\|> much
	)
	# print(completion)
	sequences.add(completion.choices[0].message.content)
	tbl.add_data(model, completion.choices[0].message.content)
	sleep(5) # cheaply avoid rate limiting
	except Exception as e:
	print('something went wrong for', model, e)
	errors += 1
	print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
	wandb.summary[f"{model}_unique_sequences"] = len(sequences)
	wandb.summary[f"{model}_errors"] = errors
	print(json.dumps(list(sequences)))
	results.append((len(sequences), model))

	# Testing completion models
	# for model in completion_models:
	# sequences = set()
	# errors = 0
	# with TimeIt(model):
	# for _ in range(C):
	# try:
	# completion = openai.Completion.create(
	# model=model,
	# prompt=prompt,
	# max_tokens=N,
	# temperature=0,
	# logit_bias = {"50256": -100.0}, # prevent EOS
	# )
	# sequences.add(completion.choices[0].text)
	# sleep(1)
	# except Exception as e:
	# print('something went wrong for', model, e)
	# errors += 1
	# print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
	# print(json.dumps(list(sequences)))
	# results.append((len(sequences), model))

	# Printing table of results
	wandb.log({"results": tbl})
	print("\nTable of Results:")
	print("Num_Sequences\tModel_Name")
	for num_sequences, model_name in results:
	print(f"{num_sequences}\t{model_name}")