Skip to content

Instantly share code, notes, and snippets.

@arcatdmz
Created October 22, 2024 05:33
Show Gist options
  • Save arcatdmz/fd24927551f123432d3b9eebb88a5b0e to your computer and use it in GitHub Desktop.
Save arcatdmz/fd24927551f123432d3b9eebb88a5b0e to your computer and use it in GitHub Desktop.
A code snippet to look up the affiliations of >100 HCI researchers with the help of Tavily and OpenAI Platform
import OpenAI from "openai";
import { tavily } from "@tavily/core";
import { readFileSync, writeFileSync, statSync } from "fs";
const tavilyApiKey = process.env.TAVILY_API_KEY;
const openaiApiKey = process.env.OPENAI_API_KEY;
const model = "gpt-4o"; // "gpt-4o-mini"
const names = readFileSync("list.txt", { encoding: "utf8" }).split(/\r?\n/);
const title = "HCI researcher";
const wait = 1000;
const tavilyClient = tavily({ apiKey: tavilyApiKey });
const openai = new OpenAI({ apiKey: openaiApiKey });
let answers = [];
for (let name of names) {
const res = await search(name, wait);
answers.push(res);
}
writeFileSync(`result-${model}.txt`, answers.join("\n"), { encoding: "utf8" });
/**
* Search for the current affiliations.
*
* @param {string} name
* @param {number} wait
* @returns
*/
async function search(name, wait) {
// conduct a web search or load content from cache
const stat = statSync(`search/${name}.txt`);
const content = await (stat.isFile() ? loadFromCache(name) : buildFromWeb(name));
// extract affiliation(s)
const completion = await openai.chat.completions.create({
model,
messages: [
{
role: "system",
content:
"You are a human resource database that knows who belongs to which organization. When you return the affiliation information, rely on the supplemental information the user provides.",
},
{
role: "user",
content,
},
],
});
// write results
const affiliation = completion.choices[0].message.content;
const answer = `${name}: ${affiliation}`;
const text = `Q. ${content}\n\nA. ${answer}`;
writeFileSync(`${model}/${name}.txt`, text, { encoding: "utf8" });
// wait for specified time
await new Promise((r) => setTimeout(r, wait));
// return "name: affiliation(s)"
return answer;
}
async function loadFromCache(name) {
return readFileSync(`search/${name}.txt`, { encoding: "utf8" });
}
async function buildFromWeb(name) {
const q = `What are the current affiliation(s) of the ${title} named ${name}?`;
const response = await tavilyClient.search(q);
const content = `${q} Return only the affiliations (organization names) as plain text. When there are multiple organizations, split them by commas. When the organization names have shorthands, postpend them to the full names in braces.\n\nFor your information, the latest web search returns the following information:\n\n${response.results
.map(
(res, no) =>
`[Result ${no + 1} (confidence: ${res.score})]\n- url: ${
res.url
}\n- title: ${res.title}\n- content: ${res.content}`
)
.join("\n\n")}`;
writeFileSync(`search/${name}.txt`, content, { encoding: "utf8" });
return content;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment