Last active
September 27, 2025 21:05
-
-
Save danielrosehill/c6bf4392f3d8deaa42fdacf859d20d27 to your computer and use it in GitHub Desktop.
Scrape your custom GPTs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async () => { | |
// ---------- helpers ---------- | |
const sleep = ms => new Promise(r => setTimeout(r, ms)); | |
const ABS = u => new URL(u, location.href).href; | |
const norm = s => (s || '').replace(/\s+/g, ' ').trim(); | |
const clean = s => s.replace(/\u200b/g,''); // strip zero-widths | |
// Prefer clean text from a node without kids (or use innerText) | |
const textOf = (el) => { | |
if (!el) return ''; | |
const t = el.childElementCount === 0 ? el.textContent : el.innerText || el.textContent; | |
return norm(clean(t || '')); | |
}; | |
// Try to find a "title-ish" element inside a card/anchor | |
const findTitleEl = (root) => | |
root.querySelector('h1,h2,h3,[role="heading"],[data-testid*="title" i],.title,[class*="title" i]'); | |
// Try to find a "description-ish" element | |
const findDescEl = (root) => | |
root.querySelector('[data-testid*="description" i],.description,[class*="description" i],p'); | |
// Walk document + shadow DOMs | |
const anchors = []; | |
const visit = (root) => { | |
root.querySelectorAll?.('a[href]').forEach(a => anchors.push(a)); | |
root.querySelectorAll?.('*').forEach(el => { if (el.shadowRoot) visit(el.shadowRoot); }); | |
}; | |
visit(document); | |
// Optional: jiggle scroll a bit (helps virtualized lists) | |
for (let i = 0; i < 8; i++) { | |
window.scrollTo(0, document.body.scrollHeight); | |
await sleep(250); | |
} | |
// Match GPT links | |
const isGPT = href => /\/g\/g-[a-z0-9]+/i.test(href); | |
const rows = []; | |
const seen = new Set(); | |
for (const a of anchors) { | |
const href = ABS(a.getAttribute('href') || ''); | |
if (!isGPT(href)) continue; | |
if (seen.has(href)) continue; | |
// Title: heading-ish > aria-label/title > first line | |
let title = ''; | |
const titleEl = findTitleEl(a); | |
if (titleEl) title = textOf(titleEl); | |
if (!title) title = norm(a.getAttribute('aria-label') || a.getAttribute('title') || ''); | |
// Fallback: first non-empty line of anchor text | |
const allText = norm(a.innerText || a.textContent || ''); | |
if (!title && allText) { | |
title = allText.split('\n').map(s => s.trim()).find(Boolean) || ''; | |
} | |
// Description: prefer explicit desc node | |
let desc = ''; | |
const descEl = findDescEl(a); | |
if (descEl) desc = textOf(descEl); | |
// If still empty (or title+desc jammed together), split the remaining text | |
if (!desc) { | |
// remove exact title occurrence once | |
let rest = allText; | |
if (title && rest.startsWith(title)) { | |
rest = rest.slice(title.length).trim(); | |
} | |
// take the first meaningful line (longer than e.g. 20 chars) | |
const lines = rest.split('\n').map(s => s.trim()).filter(Boolean); | |
desc = lines.find(s => s.length > 20 && s !== title) || lines[0] || ''; | |
// if desc still equals title (bad split), blank it | |
if (desc === title) desc = ''; | |
} | |
// Image: thumbnail inside anchor | |
const img = a.querySelector('img'); | |
const image = img?.currentSrc || img?.src || ''; | |
rows.push([title, desc, href, image]); | |
seen.add(href); | |
} | |
// CSV download | |
const esc = s => `"${(s||'').replace(/"/g,'""')}"`; | |
const csv = ['Title,Description,URL,Image', ...rows.map(r => r.map(esc).join(','))].join('\n'); | |
const blob = new Blob([csv], { type:'text/csv;charset=utf-8' }); | |
const link = document.createElement('a'); | |
link.href = URL.createObjectURL(blob); | |
link.download = 'my-gpts-detailed.csv'; | |
link.click(); | |
// Console summary | |
console.log(`Exported ${rows.length} GPTs`); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment