Created
March 12, 2022 11:47
-
-
Save rigwild/e432ed3cec859b77324ee78309ff6368 to your computer and use it in GitHub Desktop.
LinkedIn scraper Tampermonkey script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Scrape Linkedin people search | |
// @namespace http://tampermonkey.net/ | |
// @version 0.1 | |
// @description try to take over the world! | |
// @author You | |
// @match https://www.linkedin.com/search/results/people* | |
// @icon https://www.google.com/s2/favicons?sz=64&domain=linkedin.com | |
// @grant none | |
// ==/UserScript== | |
// @ts-check | |
const delay = ms => new Promise(res => setTimeout(res, ms)) | |
const getPeople = () => { | |
const results = document.querySelector('.search-results-container') | |
const peopleNodes = results.querySelectorAll('.reusable-search__result-container') | |
// console.log(peopleNodes) | |
const people = [...peopleNodes].map(x => { | |
let link = x.querySelector('a')?.href | |
let id | |
if (link) { | |
const url = new URL(link) | |
id = url.pathname.replace('/in/', '') | |
link = `${url.origin}${url.pathname}` | |
} | |
return { | |
id, | |
link, | |
name: x.querySelector('.entity-result__title-text a span span')?.textContent.trim(), | |
description: x.querySelector('.entity-result__primary-subtitle')?.textContent.trim(), | |
localisation: x.querySelector('.entity-result__secondary-subtitle')?.textContent.trim(), | |
summary: x.querySelector('.entity-result__summary')?.textContent.trim(), | |
commonConnectionsSummary: x.querySelector('.entity-result__simple-insight-text')?.textContent.trim(), | |
commonConnections: [...x.querySelectorAll('.entity-result__simple-insight-text a')].map(y => | |
y.textContent.trim() | |
), | |
avatar: x.querySelector('img')?.src | |
} | |
}) | |
// console.log(people) | |
return people | |
} | |
const nextPage = async () => { | |
window.scrollTo(0, document.body.scrollHeight) | |
await delay(500) | |
document.querySelector('button[aria-label="Next"]').click() | |
await addScrapeButtons() | |
} | |
const scrape = async pagesCount => { | |
if (window.peopleMap) { | |
var peopleMap = window.peopleMap | |
} else { | |
var peopleMap = new Map() | |
window.peopleMap = peopleMap | |
} | |
for (let i = 0; i < pagesCount; i++) { | |
const people = getPeople() | |
people.forEach(person => peopleMap.set(person.id, person)) | |
console.log(peopleMap, people) | |
await nextPage() | |
await delay(2000 + Math.random() * 10000) | |
} | |
} | |
const addScrapeButton = pagesCount => { | |
const getFiltersList = () => document.querySelector('.search-reusables__filters-bar-grouping') | |
const addPill = filtersList => { | |
filtersList.style.gap = '7px' | |
const pill = document.querySelector('.artdeco-pill').cloneNode() | |
pill.textContent = `Scrape ${pagesCount}` | |
pill.addEventListener('click', () => scrape(pagesCount)) | |
filtersList.append(pill) | |
} | |
let filtersList = getFiltersList() | |
if (filtersList) { | |
addPill(filtersList) | |
return Promise.resolve() | |
} | |
return new Promise(resolve => { | |
const check = (changes, observer) => { | |
const _filtersList = getFiltersList() | |
if (_filtersList) { | |
observer.disconnect() | |
addPill(_filtersList) | |
resolve() | |
} | |
} | |
new MutationObserver(check).observe(document, { childList: true, subtree: true }) | |
}) | |
} | |
const addScrapeButtons = async () => { | |
await delay(100) | |
await addScrapeButton(1) | |
await addScrapeButton(5) | |
await addScrapeButton(10) | |
await addScrapeButton(20) | |
await addScrapeButton(50) | |
await addScrapeButton(100) | |
} | |
;(function () { | |
'use strict' | |
addScrapeButtons() | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment