Skip to content

Instantly share code, notes, and snippets.

@mals14
Forked from adrianhorning08/scrapeLinkedinSearch.js
Created January 13, 2024 23:39
Show Gist options
  • Save mals14/7359f7d696a1c3429a6e221886cd1ea5 to your computer and use it in GitHub Desktop.
Save mals14/7359f7d696a1c3429a6e221886cd1ea5 to your computer and use it in GitHub Desktop.
Scrape Linkedin Search
async function scrapeLinkedinSearch() {
let allProfiles = [];
let page = 1;
window.scrollTo(0, document.body.scrollHeight);
await new Promise((resolve) => setTimeout(resolve, 500));
// find the button aria-label="Next"
let nextButton = document.querySelector('button[aria-label="Next"]');
const peeps = getProfiles();
allProfiles.push(...peeps);
// is the nextButton disabled?
let isNextButtonDisabled = nextButton.disabled;
while (!isNextButtonDisabled) {
console.log("page", page);
let randomDelay = Math.floor(Math.random() * 3000) + 1000;
console.log(`Waiting ${randomDelay}ms so you don't get flagged 🤪`);
console.log(
`If you need anything else scraped, email me: [email protected]`
);
await new Promise((resolve) => setTimeout(resolve, randomDelay));
window.scrollTo(0, document.body.scrollHeight);
await new Promise((resolve) => setTimeout(resolve, 900));
const people = getProfiles();
allProfiles.push(...people);
nextButton = document.querySelector('button[aria-label="Next"]');
isNextButtonDisabled = nextButton.disabled;
nextButton.click();
page++;
}
// dedupe the profiles
allProfiles = allProfiles.filter(
(v, i, a) => a.findIndex((t) => t.linkedinId === v.linkedinId) === i
);
console.log(
`Congrats! 🎉 You just scraped ${allProfiles.length} profiles! If you want more leads, or want anything else scraped, email me: [email protected]`
);
const ts = new Date().toISOString();
const fileName = "linkedin-profiles-" + ts + ".csv";
convertJsonToCsvAndDownload(allProfiles, fileName);
}
function convertJsonToCsvAndDownload(jsonData, fileName) {
// Convert JSON to CSV
const csvData = [];
// Extract the headers
const headers = Object.keys(jsonData[0]);
csvData.push(headers.join(","));
jsonData.forEach((item) => {
const row = [];
for (const key in item) {
if (item.hasOwnProperty(key)) {
const value = item[key].includes(",") ? `"${item[key]}"` : item[key];
row.push(value);
}
}
csvData.push(row.join(","));
});
// Create a Blob containing the CSV data
const csvBlob = new Blob([csvData.join("\n")], {
type: "text/csv;charset=utf-8",
});
// Create a URL for the Blob
const csvUrl = URL.createObjectURL(csvBlob);
// Create a link element
const link = document.createElement("a");
link.href = csvUrl;
link.target = "_blank";
link.download = fileName;
// Append the link to the body
document.body.appendChild(link);
// Trigger a click event on the link
link.click();
// Remove the link and revoke the Blob URL
document.body.removeChild(link);
URL.revokeObjectURL(csvUrl);
}
function getProfiles() {
const allPeeps = [];
const listOfProfiles = document.querySelectorAll(".entity-result");
for (let i = 0; i < listOfProfiles.length; i++) {
const el = listOfProfiles[i];
const spanElement = el.querySelector(".entity-result__title-text");
const linkedinId = el
.getAttribute("data-chameleon-result-urn")
?.split(":")?.[3];
// Extract the person's name
let name = spanElement
.querySelector('span[aria-hidden="true"]')
.textContent.trim();
const textRegex = /[A-Za-z0-9\s]+/g;
const textMatches = name?.match(textRegex);
if (textMatches) {
// Join the matches to get the extracted text
const extractedText = textMatches.join("");
name = extractedText.trim();
}
const title = el
.querySelector(".entity-result__primary-subtitle")
.textContent.trim();
const location = el
.querySelector(".entity-result__secondary-subtitle")
.textContent.trim();
// Extract the LinkedIn profile URL
const linkedinProfileUrl = spanElement
.querySelector("a.app-aware-link")
.getAttribute("href");
allPeeps.push({
linkedinId,
name,
title,
location,
url: linkedinProfileUrl?.split("?")?.[0],
});
}
console.log(`Found ${allPeeps.length} profiles!`);
return allPeeps;
}
await scrapeLinkedinSearch();
// if you need anything scraped, email me: [email protected] 🤘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment