Skip to content

Instantly share code, notes, and snippets.

@skeptrunedev
Created March 17, 2025 19:46
Show Gist options
  • Save skeptrunedev/175dd4bde4a1c2babd3dc739aee39040 to your computer and use it in GitHub Desktop.
Save skeptrunedev/175dd4bde4a1c2babd3dc739aee39040 to your computer and use it in GitHub Desktop.
ingest-amplify-voice-to-trieve.ts
import { Window } from "happy-dom";
import { TrieveSDK, type ChunkReqPayload } from "trieve-ts-sdk";
const trieve = new TrieveSDK({
datasetId: process.env.TRIEVE_DATASET_ID,
apiKey: process.env.TRIEVE_API_KEY ?? "",
});
const fetchPageOfPosts = async ({ page }: { page: number }) => {
const htmlPageResp = await fetch(
`https://www.skool.com/amplify-voice-ai?p=${page}`,
{
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"accept-language": "en-US,en;q=0.7",
"cache-control": "no-cache",
pragma: "no-cache",
priority: "u=0, i",
"sec-ch-ua":
'"Not(A:Brand";v="99", "Brave";v="133", "Chromium";v="133"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"sec-gpc": "1",
"upgrade-insecure-requests": "1",
},
body: null,
method: "GET",
}
);
const html = await htmlPageResp.text();
const window = new Window({
settings: {
disableJavaScriptEvaluation: true,
disableJavaScriptFileLoading: true,
},
});
const document = window.document;
document.body.innerHTML = html;
const NEXT_DATA = document.querySelector("#__NEXT_DATA__");
const pageProps = JSON.parse(NEXT_DATA?.innerHTML || "{}").props.pageProps;
const chunkReqPayloads: ChunkReqPayload[] = [];
for (const postObj of pageProps.postTrees) {
const curPost = postObj.post;
const image_urls = [];
if (curPost.metadata.imagePreview) {
image_urls.push(curPost.metadata.imagePreview);
}
const reqPayload: ChunkReqPayload = {
tracking_id: curPost.id,
link: `https://www.skool.com/amplify-voice-ai/${curPost.name}`,
num_value: Number(curPost.metadata.upvotes),
time_stamp: curPost.createdAt,
chunk_html: `<div>
<h1>${curPost.metadata.title}</h1>
<p>${curPost.metadata.content}</p>
<p>By ${curPost.user.firstName} ${curPost.user.lastName}</p>
</div>
`,
image_urls,
upsert_by_tracking_id: true,
metadata: curPost,
};
chunkReqPayloads.push(reqPayload);
}
await trieve.createChunk(chunkReqPayloads);
console.log(`Queued ${chunkReqPayloads.length} chunks for page ${page}`);
return !!chunkReqPayloads.length;
};
let page = 1;
while (true) {
const hasNext = await fetchPageOfPosts({ page });
if (!hasNext) {
break;
}
page++;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment