Skip to content

Instantly share code, notes, and snippets.

@morganmcg1
Created January 23, 2024 16:50
Show Gist options
  • Save morganmcg1/40d3a801613921371c02338efa70c3dd to your computer and use it in GitHub Desktop.
Save morganmcg1/40d3a801613921371c02338efa70c3dd to your computer and use it in GitHub Desktop.
Non-determinism in GPT-3.5, GPT-4 and Mixtral
import os
import json
import tqdm
import wandb
from openai import OpenAI
from time import sleep
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
chat_models = ["gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-4-1106-preview"]
together_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
# together_models = []
chat_models.extend(together_models)
message_history = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a unique, surprising, extremely randomized story with highly unpredictable changes of events."}
]
completion_models = ["text-davinci-003", "text-davinci-001", "davinci-instruct-beta", "davinci"]
prompt = "[System: You are a helpful assistant]\n\nUser: Write a unique, surprising, extremely randomized story with highly unpredictable changes of events.\n\nAI:"
results = []
import time
class TimeIt:
def __init__(self, name): self.name = name
def __enter__(self): self.start = time.time()
def __exit__(self, *args): print(f"{self.name} took {time.time() - self.start} seconds")
C = 30 # number of completions to make per model
N = 128 # max_tokens
wandb.init(project="moe-non-determinism", entity="morgan", config={"n_completions": C, "max_tokens": N})
tbl = wandb.Table(columns=["model", "sequence"])
# Testing chat models
for model in chat_models:
if model in together_models:
api_key = os.environ.get("TOGETHER_API_KEY")
base_url="https://api.together.xyz/"
logit_bias = None
else:
api_key = os.environ.get("OPENAI_API_KEY")
base_url = None
logit_bias = {"100257": -100.0}
client = OpenAI(api_key=api_key, base_url=base_url)
sequences = set()
errors = 0 # although I track errors, at no point were any errors ever emitted
with TimeIt(model):
for _ in range(C):
try:
completion = client.chat.completions.create(
model=model,
messages=message_history,
max_tokens=N,
temperature=0,
logit_bias=logit_bias, # this doesn't really do anything, because chat models don't do <|endoftext|> much
)
# print(completion)
sequences.add(completion.choices[0].message.content)
tbl.add_data(model, completion.choices[0].message.content)
sleep(5) # cheaply avoid rate limiting
except Exception as e:
print('something went wrong for', model, e)
errors += 1
print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
wandb.summary[f"{model}_unique_sequences"] = len(sequences)
wandb.summary[f"{model}_errors"] = errors
print(json.dumps(list(sequences)))
results.append((len(sequences), model))
# Testing completion models
# for model in completion_models:
# sequences = set()
# errors = 0
# with TimeIt(model):
# for _ in range(C):
# try:
# completion = openai.Completion.create(
# model=model,
# prompt=prompt,
# max_tokens=N,
# temperature=0,
# logit_bias = {"50256": -100.0}, # prevent EOS
# )
# sequences.add(completion.choices[0].text)
# sleep(1)
# except Exception as e:
# print('something went wrong for', model, e)
# errors += 1
# print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
# print(json.dumps(list(sequences)))
# results.append((len(sequences), model))
# Printing table of results
wandb.log({"results": tbl})
print("\nTable of Results:")
print("Num_Sequences\tModel_Name")
for num_sequences, model_name in results:
print(f"{num_sequences}\t{model_name}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment