Created
August 27, 2024 13:15
-
-
Save twfahey1/0fdd9ba73b16a104ff01b1510ac24a38 to your computer and use it in GitHub Desktop.
Scrape links in the console
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Retrieve or initialize the captured data in local storage | |
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]'); | |
// Function to capture all links and titles on the current page | |
function captureAllLinks() { | |
const links = document.querySelectorAll('.gs-title a.gs-title'); | |
links.forEach(link => { | |
const title = link.textContent.trim(); | |
let url = link.getAttribute('href') || ''; // Fallback to empty string if URL is null | |
// Clean the URL if it contains a Google redirect | |
if (url.includes('https://www.google.com/url?')) { | |
// Extract the actual URL from the query string | |
const urlParams = new URLSearchParams(url.split('?')[1]); | |
url = urlParams.get('q') || url; // Use 'q' parameter if it exists | |
} | |
// Avoid duplicate entries | |
if (!capturedLinks.some(e => e.url === url && e.title === title)) { | |
capturedLinks.push({ title, url }); | |
} | |
}); | |
// Save the captured links to local storage | |
localStorage.setItem('capturedLinks', JSON.stringify(capturedLinks)); | |
console.log(`${links.length} links captured and cleaned on this page.`); | |
} | |
// Function to clear the captured data from local storage | |
function clearCapturedData() { | |
localStorage.removeItem('capturedLinks'); | |
console.log('Captured data cleared.'); | |
} | |
// Function to display the currently captured data | |
function showCapturedData() { | |
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]'); | |
if (capturedLinks.length === 0) { | |
console.log('No data captured yet.'); | |
} else { | |
console.table(capturedLinks); | |
} | |
} | |
// Function to convert JSON data to CSV format | |
function convertToCSV(arr) { | |
const array = [Object.keys(arr[0])].concat(arr); | |
return array.map(it => { | |
return Object.values(it).toString(); | |
}).join('\n'); | |
} | |
// Function to trigger CSV download | |
function downloadCSV(csvContent, fileName) { | |
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); | |
const link = document.createElement('a'); | |
if (link.download !== undefined) { | |
const url = URL.createObjectURL(blob); | |
link.setAttribute('href', url); | |
link.setAttribute('download', fileName); | |
link.style.visibility = 'hidden'; | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
} | |
} | |
// Function to export the captured data as a CSV file | |
function exportCapturedData() { | |
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]'); | |
if (capturedLinks.length === 0) { | |
console.log('No data to export.'); | |
return; | |
} | |
let csv = convertToCSV(capturedLinks); | |
downloadCSV(csv, 'captured_links.csv'); | |
console.log('Captured data exported as CSV.'); | |
} | |
// Usage example: | |
// 1. Capture data: captureAllLinks() | |
// 2. Show data: showCapturedData() | |
// 3. Clear data: clearCapturedData() | |
// 4. Export data: exportCapturedData() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment