Skip to content

Instantly share code, notes, and snippets.

@vinnymac
Created January 15, 2025 07:52
Show Gist options
  • Save vinnymac/26bf1bba81ea9225c8802dae18d12e35 to your computer and use it in GitHub Desktop.
Save vinnymac/26bf1bba81ea9225c8802dae18d12e35 to your computer and use it in GitHub Desktop.
Example of using evalite with Azure and autoevals Factuality
import { createAzure } from '@ai-sdk/azure';
import { streamText } from 'ai';
import { Levenshtein, type Scorer } from 'autoevals';
import { evalite } from 'evalite';
import { traceAISDKModel } from 'evalite/ai-sdk';
process.env.AZURE_API_KEY = process.env.AZURE_OPENAI_API_KEY;
process.env.AZURE_OPENAI_RESOURCE_NAME = process.env.AZURE_OPENAI_RESOURCE_NAME;
const azure = createAzure({
resourceName: process.env.AZURE_OPENAI_RESOURCE_NAME,
apiKey: process.env.AZURE_OPENAI_API_KEY,
apiVersion: process.env.OPENAI_API_VERSION,
});
async function streamToText(stream: AsyncIterable<string>): Promise<string> {
let text = '';
for await (const chunk of stream) {
text += chunk;
}
return text;
}
// Create a custom Azure-based factuality scorer that implements the Scorer interface
const AzureFactuality: Scorer<string, string> = async ({ output, expected }) => {
const result = await streamText({
model: azure(deploymentId),
system: `You are an AI assistant that evaluates the factual accuracy of responses.
Compare the following output with the expected answer and determine if they convey the same factual information.
Respond with a score between 0 and 1, where:
1 = Completely factually accurate and equivalent
0 = Completely factually inaccurate or different
Only respond with the numerical score, nothing else.`,
prompt: `Output: "${output}"
Expected: "${expected}"`,
});
const text = await streamToText(result.textStream);
const score = parseFloat(text);
return {
name: 'AzureFactuality',
score,
reason: `Factual similarity score: ${score}`,
};
};
const deploymentId = process.env.AZURE_OPENAI_DEPLOYMENT_ID;
evalite('Test Capitals', {
data: async () => [
{
input: `What's the capital of France?`,
expected: `Paris`,
},
{
input: `What's the capital of Germany?`,
expected: `Berlin`,
},
],
task: async (input) => {
const result = await streamText({
model: traceAISDKModel(azure(deploymentId)),
system: `
Answer the question concisely. Answer in as few words as possible.
Remove full stops from the end of the output.
If the country has no capital, return '<country> has no capital'.
If the country does not exist, return 'Unknown'.
`,
prompt: input,
});
return result.textStream;
},
scorers: [
AzureFactuality,
Levenshtein,
],
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment