danielrosehill · September 27, 2025 21:05
diff --git a/custom-gpt-scraper.js b/custom-gpt-scraper.js
 (async () => {
  // ---------- helpers ----------
  const sleep = ms => new Promise(r => setTimeout(r, ms));
  const ABS = u => new URL(u, location.href).href;
  const norm = s => (s || '').replace(/\s+/g, ' ').trim();
  const clean = s => s.replace(/\u200b/g,''); // strip zero-widths

  // Prefer clean text from a node without kids (or use innerText)
  const textOf = (el) => {
    if (!el) return '';
    const t = el.childElementCount === 0 ? el.textContent : el.innerText || el.textContent;
    return norm(clean(t || ''));
  };

  // Try to find a "title-ish" element inside a card/anchor
  const findTitleEl = (root) =>
    root.querySelector('h1,h2,h3,[role="heading"],[data-testid*="title" i],.title,[class*="title" i]');

  // Try to find a "description-ish" element
  const findDescEl = (root) =>
    root.querySelector('[data-testid*="description" i],.description,[class*="description" i],p');

  // Walk document + shadow DOMs
  const anchors = [];
  const visit = (root) => {
    root.querySelectorAll?.('a[href]').forEach(a => anchors.push(a));
    root.querySelectorAll?.('*').forEach(el => { if (el.shadowRoot) visit(el.shadowRoot); });
  };
  visit(document);

  // Optional: jiggle scroll a bit (helps virtualized lists)
  for (let i = 0; i < 8; i++) {
    window.scrollTo(0, document.body.scrollHeight);
    await sleep(250);
  }

  // Match GPT links
  const isGPT = href => /\/g\/g-[a-z0-9]+/i.test(href);

  const rows = [];
  const seen = new Set();

  for (const a of anchors) {
    const href = ABS(a.getAttribute('href') || '');
    if (!isGPT(href)) continue;
    if (seen.has(href)) continue;

    // Title: heading-ish > aria-label/title > first line
    let title = '';
    const titleEl = findTitleEl(a);
    if (titleEl) title = textOf(titleEl);
    if (!title) title = norm(a.getAttribute('aria-label') || a.getAttribute('title') || '');

    // Fallback: first non-empty line of anchor text
    const allText = norm(a.innerText || a.textContent || '');
    if (!title && allText) {
      title = allText.split('\n').map(s => s.trim()).find(Boolean) || '';
    }

    // Description: prefer explicit desc node
    let desc = '';
    const descEl = findDescEl(a);
    if (descEl) desc = textOf(descEl);

    // If still empty (or title+desc jammed together), split the remaining text
    if (!desc) {
      // remove exact title occurrence once
      let rest = allText;
      if (title && rest.startsWith(title)) {
        rest = rest.slice(title.length).trim();
      }
      // take the first meaningful line (longer than e.g. 20 chars)
      const lines = rest.split('\n').map(s => s.trim()).filter(Boolean);
      desc = lines.find(s => s.length > 20 && s !== title) || lines[0] || '';
      // if desc still equals title (bad split), blank it
      if (desc === title) desc = '';
    }

    // Image: thumbnail inside anchor
    const img = a.querySelector('img');
    const image = img?.currentSrc || img?.src || '';

    rows.push([title, desc, href, image]);
    seen.add(href);
  }

  // CSV download
  const esc = s => `"${(s||'').replace(/"/g,'""')}"`;
  const csv = ['Title,Description,URL,Image', ...rows.map(r => r.map(esc).join(','))].join('\n');
  const blob = new Blob([csv], { type:'text/csv;charset=utf-8' });
  const link = document.createElement('a');
  link.href = URL.createObjectURL(blob);
  link.download = 'my-gpts-detailed.csv';
  link.click();

  // Console summary
  console.log(`Exported ${rows.length} GPTs`);
 })();
	(async () => {
	// ---------- helpers ----------
	const sleep = ms => new Promise(r => setTimeout(r, ms));
	const ABS = u => new URL(u, location.href).href;
	const norm = s => (s \|\| '').replace(/\s+/g, ' ').trim();
	const clean = s => s.replace(/\u200b/g,''); // strip zero-widths

	// Prefer clean text from a node without kids (or use innerText)
	const textOf = (el) => {
	if (!el) return '';
	const t = el.childElementCount === 0 ? el.textContent : el.innerText \|\| el.textContent;
	return norm(clean(t \|\| ''));
	};

	// Try to find a "title-ish" element inside a card/anchor
	const findTitleEl = (root) =>
	root.querySelector('h1,h2,h3,[role="heading"],[data-testid="title" i],.title,[class="title" i]');

	// Try to find a "description-ish" element
	const findDescEl = (root) =>
	root.querySelector('[data-testid="description" i],.description,[class="description" i],p');

	// Walk document + shadow DOMs
	const anchors = [];
	const visit = (root) => {
	root.querySelectorAll?.('a[href]').forEach(a => anchors.push(a));
	root.querySelectorAll?.('*').forEach(el => { if (el.shadowRoot) visit(el.shadowRoot); });
	};
	visit(document);

	// Optional: jiggle scroll a bit (helps virtualized lists)
	for (let i = 0; i < 8; i++) {
	window.scrollTo(0, document.body.scrollHeight);
	await sleep(250);
	}

	// Match GPT links
	const isGPT = href => /\/g\/g-[a-z0-9]+/i.test(href);

	const rows = [];
	const seen = new Set();

	for (const a of anchors) {
	const href = ABS(a.getAttribute('href') \|\| '');
	if (!isGPT(href)) continue;
	if (seen.has(href)) continue;

	// Title: heading-ish > aria-label/title > first line
	let title = '';
	const titleEl = findTitleEl(a);
	if (titleEl) title = textOf(titleEl);
	if (!title) title = norm(a.getAttribute('aria-label') \|\| a.getAttribute('title') \|\| '');

	// Fallback: first non-empty line of anchor text
	const allText = norm(a.innerText \|\| a.textContent \|\| '');
	if (!title && allText) {
	title = allText.split('\n').map(s => s.trim()).find(Boolean) \|\| '';
	}

	// Description: prefer explicit desc node
	let desc = '';
	const descEl = findDescEl(a);
	if (descEl) desc = textOf(descEl);

	// If still empty (or title+desc jammed together), split the remaining text
	if (!desc) {
	// remove exact title occurrence once
	let rest = allText;
	if (title && rest.startsWith(title)) {
	rest = rest.slice(title.length).trim();
	}
	// take the first meaningful line (longer than e.g. 20 chars)
	const lines = rest.split('\n').map(s => s.trim()).filter(Boolean);
	desc = lines.find(s => s.length > 20 && s !== title) \|\| lines[0] \|\| '';
	// if desc still equals title (bad split), blank it
	if (desc === title) desc = '';
	}

	// Image: thumbnail inside anchor
	const img = a.querySelector('img');
	const image = img?.currentSrc \|\| img?.src \|\| '';

	rows.push([title, desc, href, image]);
	seen.add(href);
	}

	// CSV download
	const esc = s => `"${(s\|\|'').replace(/"/g,'""')}"`;
	const csv = ['Title,Description,URL,Image', ...rows.map(r => r.map(esc).join(','))].join('\n');
	const blob = new Blob([csv], { type:'text/csv;charset=utf-8' });
	const link = document.createElement('a');
	link.href = URL.createObjectURL(blob);
	link.download = 'my-gpts-detailed.csv';
	link.click();

	// Console summary
	console.log(`Exported ${rows.length} GPTs`);
	})();