Skip to content

Instantly share code, notes, and snippets.

@twfahey1
Created August 27, 2024 13:15
Show Gist options
  • Save twfahey1/0fdd9ba73b16a104ff01b1510ac24a38 to your computer and use it in GitHub Desktop.
Save twfahey1/0fdd9ba73b16a104ff01b1510ac24a38 to your computer and use it in GitHub Desktop.
Scrape links in the console
// Retrieve or initialize the captured data in local storage
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]');
// Function to capture all links and titles on the current page
function captureAllLinks() {
const links = document.querySelectorAll('.gs-title a.gs-title');
links.forEach(link => {
const title = link.textContent.trim();
let url = link.getAttribute('href') || ''; // Fallback to empty string if URL is null
// Clean the URL if it contains a Google redirect
if (url.includes('https://www.google.com/url?')) {
// Extract the actual URL from the query string
const urlParams = new URLSearchParams(url.split('?')[1]);
url = urlParams.get('q') || url; // Use 'q' parameter if it exists
}
// Avoid duplicate entries
if (!capturedLinks.some(e => e.url === url && e.title === title)) {
capturedLinks.push({ title, url });
}
});
// Save the captured links to local storage
localStorage.setItem('capturedLinks', JSON.stringify(capturedLinks));
console.log(`${links.length} links captured and cleaned on this page.`);
}
// Function to clear the captured data from local storage
function clearCapturedData() {
localStorage.removeItem('capturedLinks');
console.log('Captured data cleared.');
}
// Function to display the currently captured data
function showCapturedData() {
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]');
if (capturedLinks.length === 0) {
console.log('No data captured yet.');
} else {
console.table(capturedLinks);
}
}
// Function to convert JSON data to CSV format
function convertToCSV(arr) {
const array = [Object.keys(arr[0])].concat(arr);
return array.map(it => {
return Object.values(it).toString();
}).join('\n');
}
// Function to trigger CSV download
function downloadCSV(csvContent, fileName) {
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', fileName);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
// Function to export the captured data as a CSV file
function exportCapturedData() {
let capturedLinks = JSON.parse(localStorage.getItem('capturedLinks') || '[]');
if (capturedLinks.length === 0) {
console.log('No data to export.');
return;
}
let csv = convertToCSV(capturedLinks);
downloadCSV(csv, 'captured_links.csv');
console.log('Captured data exported as CSV.');
}
// Usage example:
// 1. Capture data: captureAllLinks()
// 2. Show data: showCapturedData()
// 3. Clear data: clearCapturedData()
// 4. Export data: exportCapturedData()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment