Skip to content

Instantly share code, notes, and snippets.

@RohanAwhad
Last active October 29, 2024 15:51
Show Gist options
  • Save RohanAwhad/41f42cec8acb0f21016f5b7adc519a21 to your computer and use it in GitHub Desktop.
Save RohanAwhad/41f42cec8acb0f21016f5b7adc519a21 to your computer and use it in GitHub Desktop.
Extract details from the search response in Apollo.io
function extractRowData(rowElement) {
// Function to extract text or attribute from an element if it exists
function getElementData(selector, attribute = 'text') {
const element = rowElement.querySelector(selector);
if (!element) return null;
if (attribute === 'text') return element.textContent.trim();
return element.getAttribute(attribute);
}
// Extracting the values based on their aria-label, role, or data-* attributes
const name = getElementData('a[data-to^="/people"]'); // Select the person link
const jobTitle = getElementData('span[class*="zp_xvo3G"]'); // Selecting based on class pattern
const company = getElementData('a[data-to^="/organizations"] span.zp_xvo3G');
const companyLogoURL = getElementData('img', 'src'); // Extract image src
const linkedInProfileURL = getElementData('a[href*="linkedin.com"]', 'href'); // Extract LinkedIn URL
const location = getElementData('span[class*="zp_xvo3G"]'); // Similar span for location
const industry = getElementData('span.zp_CEZf9'); // Industry data based on specific class
const engagementCount = getElementData('span[data-count-size]', 'text'); // Count data
const skills = Array.from(rowElement.querySelectorAll('span.zp_CEZf9')).map(el => el.textContent).join(', '); // Gathering all skills
const profileURL = getElementData('a[data-to^="/people"]', 'href'); // Profile URL
const companyProfileURL = getElementData('a[data-to^="/organizations"]', 'href'); // Company Profile URL
// Creating a structured object
return {
name,
jobTitle,
company,
companyLogoURL,
linkedInProfileURL,
location,
industry,
engagementCount,
skills,
profileURL,
companyProfileURL
};
}
async function extractDataAndPaginate() {
const results = [];
function extractDataFromPage() {
const data = [];
document.querySelectorAll('div[role="rowgroup"]')[1].querySelectorAll('div[role="row"]').forEach((row) => {data.push(extractRowData(row))});
return data;
}
async function clickNextAndWaitForLoad() {
return new Promise((resolve, reject) => {
const nextPageButton = document.querySelector('button[aria-label="Next"]:not([disabled])');
if (!nextPageButton) {
resolve('done');
return;
}
// Trigger the click event for the next page
nextPageButton.click();
setTimeout(() => {
console.log('loaded');
resolve('loaded');
}, 10000); // 10-second wait
});
}
let status = '';
do {
results.push(...extractDataFromPage());
status = await clickNextAndWaitForLoad();
} while (status !== 'done');
return results;
}
function downloadResults(data, filename = 'results.json') {
const jsonString = JSON.stringify(data);
const blob = new Blob([jsonString], { type: 'application/json' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = filename;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
/*
// Usage
extractDataAndPaginate().then((results) => {
console.log(results);
downloadResults(results)
});
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment