Skip to content

Instantly share code, notes, and snippets.

@danielrosehill
Last active September 27, 2025 21:05
Show Gist options
  • Save danielrosehill/c6bf4392f3d8deaa42fdacf859d20d27 to your computer and use it in GitHub Desktop.
Save danielrosehill/c6bf4392f3d8deaa42fdacf859d20d27 to your computer and use it in GitHub Desktop.
Scrape your custom GPTs
(async () => {
// ---------- helpers ----------
const sleep = ms => new Promise(r => setTimeout(r, ms));
const ABS = u => new URL(u, location.href).href;
const norm = s => (s || '').replace(/\s+/g, ' ').trim();
const clean = s => s.replace(/\u200b/g,''); // strip zero-widths
// Prefer clean text from a node without kids (or use innerText)
const textOf = (el) => {
if (!el) return '';
const t = el.childElementCount === 0 ? el.textContent : el.innerText || el.textContent;
return norm(clean(t || ''));
};
// Try to find a "title-ish" element inside a card/anchor
const findTitleEl = (root) =>
root.querySelector('h1,h2,h3,[role="heading"],[data-testid*="title" i],.title,[class*="title" i]');
// Try to find a "description-ish" element
const findDescEl = (root) =>
root.querySelector('[data-testid*="description" i],.description,[class*="description" i],p');
// Walk document + shadow DOMs
const anchors = [];
const visit = (root) => {
root.querySelectorAll?.('a[href]').forEach(a => anchors.push(a));
root.querySelectorAll?.('*').forEach(el => { if (el.shadowRoot) visit(el.shadowRoot); });
};
visit(document);
// Optional: jiggle scroll a bit (helps virtualized lists)
for (let i = 0; i < 8; i++) {
window.scrollTo(0, document.body.scrollHeight);
await sleep(250);
}
// Match GPT links
const isGPT = href => /\/g\/g-[a-z0-9]+/i.test(href);
const rows = [];
const seen = new Set();
for (const a of anchors) {
const href = ABS(a.getAttribute('href') || '');
if (!isGPT(href)) continue;
if (seen.has(href)) continue;
// Title: heading-ish > aria-label/title > first line
let title = '';
const titleEl = findTitleEl(a);
if (titleEl) title = textOf(titleEl);
if (!title) title = norm(a.getAttribute('aria-label') || a.getAttribute('title') || '');
// Fallback: first non-empty line of anchor text
const allText = norm(a.innerText || a.textContent || '');
if (!title && allText) {
title = allText.split('\n').map(s => s.trim()).find(Boolean) || '';
}
// Description: prefer explicit desc node
let desc = '';
const descEl = findDescEl(a);
if (descEl) desc = textOf(descEl);
// If still empty (or title+desc jammed together), split the remaining text
if (!desc) {
// remove exact title occurrence once
let rest = allText;
if (title && rest.startsWith(title)) {
rest = rest.slice(title.length).trim();
}
// take the first meaningful line (longer than e.g. 20 chars)
const lines = rest.split('\n').map(s => s.trim()).filter(Boolean);
desc = lines.find(s => s.length > 20 && s !== title) || lines[0] || '';
// if desc still equals title (bad split), blank it
if (desc === title) desc = '';
}
// Image: thumbnail inside anchor
const img = a.querySelector('img');
const image = img?.currentSrc || img?.src || '';
rows.push([title, desc, href, image]);
seen.add(href);
}
// CSV download
const esc = s => `"${(s||'').replace(/"/g,'""')}"`;
const csv = ['Title,Description,URL,Image', ...rows.map(r => r.map(esc).join(','))].join('\n');
const blob = new Blob([csv], { type:'text/csv;charset=utf-8' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = 'my-gpts-detailed.csv';
link.click();
// Console summary
console.log(`Exported ${rows.length} GPTs`);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment