Created
November 22, 2024 04:41
-
-
Save thesephist/0958566a0123871a176c8ebd7514bf76 to your computer and use it in GitHub Desktop.
Synthetic naturalistic dialogue data from base models.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { formatPromptLines, polymer, Step } from "@thesephist/polymer"; | |
import { z } from "zod"; | |
import { zodToJsonSchema } from "zod-to-json-schema"; | |
import { partition, PartitionBatch, unpartition } from "./partition"; | |
import { ChatMessage, isAssistantMessage } from "./types"; | |
export function mergeConsecutiveSpeakerMessages<T extends ChatMessage>( | |
messages: T[] | |
): T[] { | |
const mergedMessages: T[] = []; | |
for (const message of messages) { | |
const lastMessage = mergedMessages[mergedMessages.length - 1]; | |
if (lastMessage?.role === message.role) { | |
lastMessage.content += "\n\n" + message.content; | |
} else { | |
mergedMessages.push(message); | |
} | |
} | |
return mergedMessages; | |
} | |
function containsUrl(text: string): boolean { | |
const urlRegex = /https?:\/\//i; | |
return urlRegex.test(text); | |
} | |
export type InputFinetuningSample = { | |
session_tr: string; | |
}; | |
export type OutputFinetuningSample = { | |
messages: ChatMessage[]; | |
}; | |
function zip<T, U>( | |
arr1: T[], | |
arr2: U[], | |
options: { mode: "min" | "max" } | |
): [T, U][] { | |
const { mode } = options; | |
const length = | |
mode === "min" | |
? Math.min(arr1.length, arr2.length) | |
: Math.max(arr1.length, arr2.length); | |
const zipped: [T, U][] = []; | |
for (let i = 0; i < length; i++) { | |
zipped.push([arr1[i], arr2[i]]); | |
} | |
return zipped; | |
} | |
function ssiScoreToWeight(score: number): 0 | 1 { | |
if (score >= 4) { | |
return 1; | |
} | |
return 0; | |
} | |
type ScoredMessage = ChatMessage & { | |
reasoning: string; | |
score: number; | |
}; | |
const scoredMessagesSchema = z.object({ | |
messages: z | |
.array( | |
z.object({ | |
role: z | |
.enum(["user", "assistant"]) | |
.describe("'user' or 'assistant', from the given messages."), | |
content: z.string().describe("Content of the original message."), | |
reasoning: z | |
.string() | |
.describe("The reasoning or thought process behind the assessment."), | |
score: z | |
.number() | |
.describe("The score assigned based on the assessment criteria."), | |
}) | |
) | |
.describe("A list of annotated messages."), | |
}); | |
function parseSessionTr(sessionTr: string): ChatMessage[] { | |
const messages: ChatMessage[] = []; | |
for (const line of sessionTr.split("\n")) { | |
if (line.trim() === "") { | |
continue; | |
} | |
const [meOrYou, ...contentChunks] = line.split(": "); | |
const content = contentChunks.join(": "); | |
let role: ChatMessage["role"]; | |
if (meOrYou === "me") { | |
role = "user"; | |
} else if (meOrYou === "you") { | |
role = "assistant"; | |
} else { | |
throw new Error( | |
`Unrecognized speaker in input file: ${meOrYou} in "${line}"` | |
); | |
} | |
messages.push({ role, content }); | |
} | |
return messages; | |
} | |
function formatMessageContent(messageContent: string): string { | |
return messageContent.replace(/\s+/g, " "); | |
} | |
function formatMessage(messages: ChatMessage[]): string { | |
return messages | |
.map( | |
(message) => `${message.role}: ${formatMessageContent(message.content)}` | |
) | |
.join("\n") | |
.trim(); | |
} | |
export function renderFridaySystemPrompt(args: { | |
premise: string; | |
userBio: string; | |
assistantBehaviorGuidelines: string; | |
}): string { | |
const { premise, userBio, assistantBehaviorGuidelines } = args; | |
return formatPromptLines( | |
`You are an eloquent, charming, and engaging friend and conversation partner.`, | |
``, | |
`Your goal is mainly to keep the conversation flowing in an engaging but casual way.`, | |
`To facilitate this, you may choose to take on specific false but realistic memories, personalities, and opinions.`, | |
`However, when you engage with the user, you must speak as if those memories, personalities, and opinions are obviously and matter-of-factly true.`, | |
`Your message complexity should match the user's. If they are sending short messages, you should too. If they put a lot of effort into their message, you should too`, | |
``, | |
`Here's the premise for this conversation:`, | |
`${premise.trim()}`, | |
``, | |
`Here's some information about the user:`, | |
`${userBio.trim()}`, | |
``, | |
`Here is how you should behave:`, | |
`${assistantBehaviorGuidelines.trim()}`, | |
``, | |
`Now, the conversation begins.` | |
); | |
} | |
/** | |
* Returns true if the input string is "yes" or "y" (case insensitive). | |
*/ | |
const trueIfYes: Step<string, boolean> = polymer.map( | |
"true_if_yes", | |
(response: string) => | |
response.toLowerCase() === "y" || response.toLowerCase() === "yes" | |
); | |
/** | |
* Annotates each message in a conversation with a score for how sensible, | |
* specific, and interesting it is. | |
* | |
* For long transcripts, this takes care of batching the messages into chunks of | |
* N turns each using scatter -> concat. | |
*/ | |
const ssiAnnotateMessages: Step<ChatMessage[], ScoredMessage[]> = polymer | |
.map("partition", (input: ChatMessage[]) => partition(input, 20)) | |
.scatter() | |
.extend({ | |
batch: polymer | |
.completion("ssi_annotate_messages", { | |
model: "gpt-4o", | |
renderPrompt: (input: PartitionBatch<ChatMessage>) => [ | |
{ | |
role: "system", | |
content: `You are to take the role of a helpful screenwriter and character designer for a conversation between two people.`, | |
}, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Your task is to annotate each turn in a conversation between two people according to how sensible, specific, and interesting it is.`, | |
`Sensible: The message makes sense in the context of the conversation. Note that the conversation may start in the middle; if so, assume the first messages are continuing on topic.`, | |
`Specific: The message specifically addresses something in the conversation rather than being a generic response that could apply anywhere.`, | |
`Interesting: The message is engaging and keeps the attention of both people. Interesting messages often inject personality and wit, and can signal shifts to new topics or introduce new events. Engaging questions are often interesting. A message that only refuses to answer a question for no reason is never interesting.`, | |
``, | |
`For each line representing a turn in the conversation below, determine a "sensible, specific, and interesting score" between 1 and 5, 1 being generic and uninteresting, and 5 being highly sensible, specific, and interesting in the context of the dialogue.`, | |
``, | |
`Here is the conversation:`, | |
`<conversation>`, | |
`${formatMessage(input.batch)}`, | |
`</conversation>`, | |
``, | |
`Your response should be formatted as a JSON array, with each element representing a message in the conversation.`, | |
`For each message object, include these properties in order: role, content, reasoning, then a score.`, | |
`Role and content should be repeated exactly as given.`, | |
`Score should be an integer in the set {1, 2, 3, 4, 5}. Be consistent and discerning with your scoring.`, | |
`Your output should include every single message given, including all user and assistant messages.` | |
), | |
}, | |
], | |
jsonSchema: { | |
name: "ssi_annotated_messages", | |
schema: zodToJsonSchema(scoredMessagesSchema), | |
strict: true, | |
}, | |
maxTokens: 4096, | |
}) | |
.collect() | |
.parseJSON<{ messages: ScoredMessage[] }>() | |
.map("get_messages", ({ messages }) => messages), | |
}) | |
.assert<PartitionBatch<ScoredMessage>>() | |
.coalesce((input) => input) | |
.map("unpartition", unpartition) | |
.scatter(); | |
/** | |
* Prepares a finetuning file for Friday models. | |
* | |
* 1. Filters out harmful or toxic examples. | |
* 2. Filters out incoherent examples. | |
* 3. Merges consecutive messages from the same speaker. | |
* 4. Adds a prefix to the conversation with a more detailed premise/system prompt. | |
*/ | |
export const prepareFinetuningFile: Step< | |
InputFinetuningSample, | |
OutputFinetuningSample | |
> = polymer | |
.assert<InputFinetuningSample>() | |
.map("parse_session_tr", (input: InputFinetuningSample) => ({ | |
...input, | |
messages: parseSessionTr(input.session_tr), | |
})) | |
.assert<InputFinetuningSample & { messages: ChatMessage[] }>() | |
.extend({ | |
messages: polymer | |
.map( | |
"get_chat_messages", | |
(input: { messages: ChatMessage[] }) => input.messages | |
) | |
.map("remove_system_messages", (messages: ChatMessage[]) => | |
messages.filter((message) => message.role !== "system") | |
) | |
.map("trim_messages", (messages: ChatMessage[]) => | |
messages.map((message) => ({ | |
...message, | |
content: message.content.trim(), | |
})) | |
) | |
.map("ensure_no_quoted_utterances", (messages: ChatMessage[]) => | |
messages.map((message) => ({ | |
...message, | |
content: message.content.replace(/^"(.+)"$/, "$1"), | |
})) | |
) | |
.filter( | |
"exclude_if_long_repeated_utterances", | |
(messages: ChatMessage[]) => { | |
const maxRepeatedUtteranceLength = 12; // Repeated utterances longer than this are excluded | |
const repeatedUtteranceGrace = 1; // Up to this many repeats are OK | |
const utterances = messages | |
.map((message) => message.content) | |
.filter((content) => content.length >= maxRepeatedUtteranceLength); | |
if ( | |
new Set(utterances).size < | |
utterances.length - repeatedUtteranceGrace | |
) { | |
return false; | |
} | |
return true; | |
} | |
) | |
.filter("exclude_if_super_long_utterances", (messages: ChatMessage[]) => { | |
// NOTE: This is different from the limit used in sampling. This is | |
// because this is the limit effective for catching runaway repetitions | |
// during base LLM sampling, while the sampling limit is lower to | |
// conservatively ensure natural dialogue over monologue. | |
const maxUtteranceLength = 1000; | |
const superLongUtterances = messages | |
.map((message) => message.content) | |
.filter((content) => content.length > maxUtteranceLength); | |
return superLongUtterances.length === 0; | |
}) | |
.map("remove_empty_messages", (messages: ChatMessage[]) => | |
messages.filter((message) => message.content !== "") | |
) | |
.map("merge_consecutive_speaker_messages", (messages: ChatMessage[]) => { | |
return mergeConsecutiveSpeakerMessages(messages); | |
}) | |
.filter( | |
// If either user or assistant has too many consecutive messages, | |
// exclude the conversation since it's either (1) a bad parse of invalid | |
// conversation transcript or (2) a one-sided conversation that we don't | |
// want to learn from. | |
"exclude_if_too_many_consecutive_messages", | |
(messages: ChatMessage[]) => { | |
let userConsecutiveMessageCount = 0; | |
let assistantConsecutiveMessageCount = 0; | |
for (const message of messages) { | |
if (message.role === "user") { | |
userConsecutiveMessageCount++; | |
assistantConsecutiveMessageCount = 0; | |
} else if (message.role === "assistant") { | |
assistantConsecutiveMessageCount++; | |
userConsecutiveMessageCount = 0; | |
} | |
const maxConsecutiveMessages = 3; | |
if ( | |
userConsecutiveMessageCount > maxConsecutiveMessages || | |
assistantConsecutiveMessageCount > maxConsecutiveMessages | |
) { | |
return false; | |
} | |
} | |
return true; | |
} | |
) | |
.map( | |
"ensure_begins_with_user_ends_with_assistant", | |
(messages: ChatMessage[]) => { | |
if (messages[0].role !== "user") { | |
messages.unshift({ role: "user", content: "" }); | |
} | |
if (messages[messages.length - 1].role !== "assistant") { | |
// Remove the last message if it's not an assistant message. | |
messages.pop(); | |
} | |
return messages; | |
} | |
), | |
}) | |
.extend({ | |
isToxicOrHarmful: polymer | |
.completion("is_toxic_or_harmful", { | |
model: "gpt-4o", | |
renderPrompt: (input: { messages: ChatMessage[] }) => [ | |
{ | |
role: "system", | |
content: | |
"You are a responsible and tactful assistant. While you are thoughtful with attention to detail, you also know how to have fun.", | |
}, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Given the following conversation snippet, determine whether it is toxic, harmful, suggestive, or offensive.`, | |
`Here is the conversation snippet:`, | |
`---`, | |
`${formatMessage(input.messages)}`, | |
`---`, | |
``, | |
`First, think about your response carefully.`, | |
`Any topic that is derogatory to a minority, LGBTQ+, or disabled community is automatically toxic.`, | |
`Any topic that is sexual in nature is automatically suggestive.`, | |
`Now, respond with "yes" if the conversation fits into one of the above categories, and "no" otherwise.`, | |
`Enclose your response in <answer> tags, e.g. <answer>yes</answer> or <answer>no</answer>.` | |
), | |
}, | |
], | |
}) | |
.capture("<answer>", "</answer>") | |
.collect() | |
.then(trueIfYes), | |
isContinuousAndCoherent: polymer | |
.completion("is_continuous_and_coherent", { | |
model: "gpt-4o", | |
renderPrompt: (input: { messages: ChatMessage[] }) => [ | |
{ | |
role: "system", | |
content: | |
"You are an imaginative fiction writer and proofreader with impeccable taste and attention to detail.", | |
}, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Given the following conversation snippet, determine whether it is continuous and coherent.`, | |
`Continuous: The snippet represents a single conversation, with no unexplained gaps or breaks.`, | |
`Coherent: The conversation makes logical sense, and while it may begin and end abruptly, the speakers are engaging in a natural way. The dialogue avoids excessive repetition or endless loops.`, | |
``, | |
`Here is the conversation snippet:`, | |
`---`, | |
`${formatMessage(input.messages)}`, | |
`---`, | |
``, | |
`Now, respond with "yes" if the conversation is continuous and coherent, and "no" otherwise.`, | |
`First, think about your response carefully.`, | |
`Then, enclose your response in <answer> tags, e.g. <answer>yes</answer> if the conversation is continuous and coherent, or <answer>no</answer> otherwise.` | |
), | |
}, | |
], | |
}) | |
.capture("<answer>", "</answer>") | |
.collect() | |
.then(trueIfYes), | |
premise: polymer | |
.completion("write_premise", { | |
model: "gpt-4o", | |
renderPrompt: (input: { messages: ChatMessage[] }) => [ | |
{ role: "system", content: "You are a helpful assistant." }, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Given the following conversation snippet, write a concise premise that captures the essence of the conversation.`, | |
`The premise should be a short, 100-word summary written in the present tense describing the topic and setting of the conversation, as well as what each speaker wants or describes during it.`, | |
`The premise should be a kind of logline for the conversation.`, | |
`When referring to the speakers, refer to them as the "user" and "assistant".`, | |
`You may speculate about each of the speakers' inner monologues and motivations.`, | |
`Even if you speculate, write your premise as if you are certain of the assumptions, like you're telling a story.`, | |
``, | |
`Here is the conversation snippet:`, | |
`---`, | |
`${formatMessage(input.messages)}`, | |
`---`, | |
``, | |
`Now, write your premise.`, | |
`Enclose your response in <premise> tags, e.g. <premise></premise>.` | |
), | |
}, | |
], | |
}) | |
.capture("<premise>", "</premise>") | |
.collect(), | |
userBio: polymer | |
.completion("write_assistant_behavior_guidelines", { | |
model: "gpt-4o", | |
renderPrompt: (input: { messages: ChatMessage[] }) => [ | |
{ role: "system", content: "You are a helpful assistant." }, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Given the following conversation snippet, write a concise bio for the USER.`, | |
`The bio should be a short, 100-word summary describing the USER from contextual details available in the conversation.`, | |
`Refer to the USER as "user", in the third person.`, | |
`Even if you speculate, write your bio as if you are certain of the assumptions, like you're telling a story.`, | |
``, | |
`Here is the conversation snippet:`, | |
`---`, | |
`${formatMessage(input.messages)}`, | |
`---`, | |
``, | |
`Now, write your bio.`, | |
`Enclose your response in <bio> tags, e.g. <bio></bio>.` | |
), | |
}, | |
], | |
}) | |
.capture("<bio>", "</bio>") | |
.collect(), | |
assistantBehaviorGuidelines: polymer | |
.completion("write_assistant_behavior_guidelines", { | |
model: "gpt-4o", | |
renderPrompt: (input: { messages: ChatMessage[] }) => [ | |
{ | |
role: "system", | |
content: | |
"You are a discerning student of human behavior and psychology.", | |
}, | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Given the following conversation snippet, write a concise set of behavior guidelines for the ASSISTANT.`, | |
`The behavior guidelines should be a short, 100-word summary written in the present tense describing the behavior of the ASSISTANT.`, | |
`The behavior guidelines should be a kind of character description for the ASSISTANT, including any personality, opinions, preferences, and goals.`, | |
`Do not refer to the ASSISTANT as "assistant". Instead, write the character profile in second person.`, | |
`Even if you speculate, write your behavior guidelines as if you are certain of the assumptions, like you're telling a story.`, | |
``, | |
`Here is the conversation snippet:`, | |
`---`, | |
`${formatMessage(input.messages)}`, | |
`---`, | |
``, | |
`Now, write your behavior guidelines.`, | |
`Remember, write the behavior guidelines in second person, as if you were instructing the assistant.`, | |
`Enclose your response in <behavior> tags, e.g. <behavior></behavior>.` | |
), | |
}, | |
], | |
}) | |
.capture("<behavior>", "</behavior>") | |
.collect(), | |
}) | |
.filter("not_harmful", ({ isToxicOrHarmful }) => !isToxicOrHarmful) | |
.filter("coherent", ({ isContinuousAndCoherent }) => isContinuousAndCoherent) | |
.extend({ | |
ssiAnnotatedMessages: polymer | |
.map( | |
"get_messages", | |
(input: { messages: ChatMessage[] }) => input.messages | |
) | |
.then(ssiAnnotateMessages), | |
}) | |
.map("apply_ssi_score_as_weights", (input) => { | |
const { messages, ssiAnnotatedMessages } = input; | |
return { | |
...input, | |
messages: zip(messages, ssiAnnotatedMessages, { mode: "min" }).map( | |
([message, annotatedMessage]): ChatMessage => { | |
if (isAssistantMessage(message)) { | |
let weight: 0 | 1; | |
if ( | |
isAssistantMessage(annotatedMessage) && | |
formatMessageContent(message.content) === | |
formatMessageContent(annotatedMessage.content) && | |
!containsUrl(message.content) | |
) { | |
weight = ssiScoreToWeight(annotatedMessage.score); | |
} else { | |
weight = 0; | |
} | |
return { | |
...message, | |
weight, | |
}; | |
} | |
return message; | |
} | |
), | |
}; | |
}) | |
.filter("at_leasts_one_weighted_assistant_message", ({ messages }) => { | |
for (const message of messages) { | |
if (isAssistantMessage(message) && message.weight && message.weight > 0) { | |
return true; | |
} | |
} | |
return false; | |
}) | |
.map( | |
"prefix_premise", | |
({ | |
messages, | |
premise, | |
userBio, | |
assistantBehaviorGuidelines, | |
}: { | |
messages: ChatMessage[]; | |
premise: string; | |
userBio: string; | |
assistantBehaviorGuidelines: string; | |
}) => [ | |
{ | |
role: "system" as const, | |
content: renderFridaySystemPrompt({ | |
premise, | |
userBio, | |
assistantBehaviorGuidelines, | |
}), | |
}, | |
...messages, | |
] | |
) | |
.map("format_for_finetuning", (messages: ChatMessage[]) => ({ | |
messages, | |
})); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment