ThinhPhan · March 8, 2024 03:56
diff --git a/extract_urls.js b/extract_urls.js
 // How to use
 // Paste this script into Developer Console to run it
 // Ref: https://www.datablist.com/learn/scraping/extract-urls-from-webpage

 const results = [
  ['Url', 'Anchor Text', 'External']
 ];
 var urls = document.getElementsByTagName('a');
 for (urlIndex in urls) {
  const url = urls[urlIndex]
  const externalLink = url.host !== window.location.host
  if(url.href && url.href.indexOf('://')!==-1) results.push([url.href, url.text, externalLink]) // url.rel
 }
 const csvContent = results.map((line)=>{
  return line.map((cell)=>{
    if(typeof(cell)==='boolean') return cell ? 'TRUE': 'FALSE'
    if(!cell) return ''
    let value = cell.replace(/[\f\n\v]*\n\s*/g, "\n").replace(/[\t\f ]+/g, ' ');
    value = value.replace(/\t/g, ' ').trim();
    return `"${value}"`
  }).join('\t')
 }).join("\n");
 console.log(csvContent)
	// How to use
	// Paste this script into Developer Console to run it
	// Ref: https://www.datablist.com/learn/scraping/extract-urls-from-webpage

	const results = [
	['Url', 'Anchor Text', 'External']
	];
	var urls = document.getElementsByTagName('a');
	for (urlIndex in urls) {
	const url = urls[urlIndex]
	const externalLink = url.host !== window.location.host
	if(url.href && url.href.indexOf('://')!==-1) results.push([url.href, url.text, externalLink]) // url.rel
	}
	const csvContent = results.map((line)=>{
	return line.map((cell)=>{
	if(typeof(cell)==='boolean') return cell ? 'TRUE': 'FALSE'
	if(!cell) return ''
	let value = cell.replace(/[\f\n\v]\n\s/g, "\n").replace(/[\t\f ]+/g, ' ');
	value = value.replace(/\t/g, ' ').trim();
	return `"${value}"`
	}).join('\t')
	}).join("\n");
	console.log(csvContent)