RohanAwhad · October 29, 2024 15:51
diff --git a/extractAndDownloadApolloSearchResults.js b/extractAndDownloadApolloSearchResults.js
 function extractRowData(rowElement) {
  // Function to extract text or attribute from an element if it exists
  function getElementData(selector, attribute = 'text') {
      const element = rowElement.querySelector(selector);
      if (!element) return null;
      if (attribute === 'text') return element.textContent.trim();
      return element.getAttribute(attribute);
  }

  // Extracting the values based on their aria-label, role, or data-* attributes
  const name = getElementData('a[data-to^="/people"]'); // Select the person link
  const jobTitle = getElementData('span[class*="zp_xvo3G"]'); // Selecting based on class pattern
  const company = getElementData('a[data-to^="/organizations"] span.zp_xvo3G');
  const companyLogoURL = getElementData('img', 'src'); // Extract image src
  const linkedInProfileURL = getElementData('a[href*="linkedin.com"]', 'href'); // Extract LinkedIn URL
  const location = getElementData('span[class*="zp_xvo3G"]'); // Similar span for location
  const industry = getElementData('span.zp_CEZf9'); // Industry data based on specific class
  const engagementCount = getElementData('span[data-count-size]', 'text'); // Count data
  const skills = Array.from(rowElement.querySelectorAll('span.zp_CEZf9')).map(el => el.textContent).join(', '); // Gathering all skills
  const profileURL = getElementData('a[data-to^="/people"]', 'href'); // Profile URL
  const companyProfileURL = getElementData('a[data-to^="/organizations"]', 'href'); // Company Profile URL

  // Creating a structured object
  return {
      name,
      jobTitle,
      company,
      companyLogoURL,
      linkedInProfileURL,
      location,
      industry,
      engagementCount,
      skills,
      profileURL,
      companyProfileURL
  };
 }

 async function extractDataAndPaginate() {
  const results = [];
  function extractDataFromPage() {
    const data = [];
    document.querySelectorAll('div[role="rowgroup"]')[1].querySelectorAll('div[role="row"]').forEach((row) => {data.push(extractRowData(row))});
    return data;
  }

  async function clickNextAndWaitForLoad() {
    return new Promise((resolve, reject) => {
        const nextPageButton = document.querySelector('button[aria-label="Next"]:not([disabled])');
        if (!nextPageButton) {
            resolve('done');
            return;
        }

        // Trigger the click event for the next page
        nextPageButton.click();
        setTimeout(() => {
            console.log('loaded');
            resolve('loaded');
        }, 10000); // 10-second wait
    });
 }


  let status = '';
  do {
    results.push(...extractDataFromPage());
    status = await clickNextAndWaitForLoad();
  } while (status !== 'done');

  return results;
 }

 function downloadResults(data, filename = 'results.json') {
    const jsonString = JSON.stringify(data);
    const blob = new Blob([jsonString], { type: 'application/json' });
    const link = document.createElement('a');
    link.href = URL.createObjectURL(blob);
    link.download = filename;
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
 }

 /*

 // Usage
 extractDataAndPaginate().then((results) => {
  console.log(results);
  downloadResults(results)
 });

 */
	function extractRowData(rowElement) {
	// Function to extract text or attribute from an element if it exists
	function getElementData(selector, attribute = 'text') {
	const element = rowElement.querySelector(selector);
	if (!element) return null;
	if (attribute === 'text') return element.textContent.trim();
	return element.getAttribute(attribute);
	}

	// Extracting the values based on their aria-label, role, or data-* attributes
	const name = getElementData('a[data-to^="/people"]'); // Select the person link
	const jobTitle = getElementData('span[class*="zp_xvo3G"]'); // Selecting based on class pattern
	const company = getElementData('a[data-to^="/organizations"] span.zp_xvo3G');
	const companyLogoURL = getElementData('img', 'src'); // Extract image src
	const linkedInProfileURL = getElementData('a[href*="linkedin.com"]', 'href'); // Extract LinkedIn URL
	const location = getElementData('span[class*="zp_xvo3G"]'); // Similar span for location
	const industry = getElementData('span.zp_CEZf9'); // Industry data based on specific class
	const engagementCount = getElementData('span[data-count-size]', 'text'); // Count data
	const skills = Array.from(rowElement.querySelectorAll('span.zp_CEZf9')).map(el => el.textContent).join(', '); // Gathering all skills
	const profileURL = getElementData('a[data-to^="/people"]', 'href'); // Profile URL
	const companyProfileURL = getElementData('a[data-to^="/organizations"]', 'href'); // Company Profile URL

	// Creating a structured object
	return {
	name,
	jobTitle,
	company,
	companyLogoURL,
	linkedInProfileURL,
	location,
	industry,
	engagementCount,
	skills,
	profileURL,
	companyProfileURL
	};
	}

	async function extractDataAndPaginate() {
	const results = [];
	function extractDataFromPage() {
	const data = [];
	document.querySelectorAll('div[role="rowgroup"]')[1].querySelectorAll('div[role="row"]').forEach((row) => {data.push(extractRowData(row))});
	return data;
	}

	async function clickNextAndWaitForLoad() {
	return new Promise((resolve, reject) => {
	const nextPageButton = document.querySelector('button[aria-label="Next"]:not([disabled])');
	if (!nextPageButton) {
	resolve('done');
	return;
	}

	// Trigger the click event for the next page
	nextPageButton.click();
	setTimeout(() => {
	console.log('loaded');
	resolve('loaded');
	}, 10000); // 10-second wait
	});
	}


	let status = '';
	do {
	results.push(...extractDataFromPage());
	status = await clickNextAndWaitForLoad();
	} while (status !== 'done');

	return results;
	}

	function downloadResults(data, filename = 'results.json') {
	const jsonString = JSON.stringify(data);
	const blob = new Blob([jsonString], { type: 'application/json' });
	const link = document.createElement('a');
	link.href = URL.createObjectURL(blob);
	link.download = filename;
	document.body.appendChild(link);
	link.click();
	document.body.removeChild(link);
	}

	/*

	// Usage
	extractDataAndPaginate().then((results) => {
	console.log(results);
	downloadResults(results)
	});

	*/