Skip to content

Instantly share code, notes, and snippets.

@rigwild
Created March 12, 2022 11:47
Show Gist options
  • Save rigwild/e432ed3cec859b77324ee78309ff6368 to your computer and use it in GitHub Desktop.
Save rigwild/e432ed3cec859b77324ee78309ff6368 to your computer and use it in GitHub Desktop.
LinkedIn scraper Tampermonkey script
// ==UserScript==
// @name Scrape Linkedin people search
// @namespace http://tampermonkey.net/
// @version 0.1
// @description try to take over the world!
// @author You
// @match https://www.linkedin.com/search/results/people*
// @icon https://www.google.com/s2/favicons?sz=64&domain=linkedin.com
// @grant none
// ==/UserScript==
// @ts-check
const delay = ms => new Promise(res => setTimeout(res, ms))
const getPeople = () => {
const results = document.querySelector('.search-results-container')
const peopleNodes = results.querySelectorAll('.reusable-search__result-container')
// console.log(peopleNodes)
const people = [...peopleNodes].map(x => {
let link = x.querySelector('a')?.href
let id
if (link) {
const url = new URL(link)
id = url.pathname.replace('/in/', '')
link = `${url.origin}${url.pathname}`
}
return {
id,
link,
name: x.querySelector('.entity-result__title-text a span span')?.textContent.trim(),
description: x.querySelector('.entity-result__primary-subtitle')?.textContent.trim(),
localisation: x.querySelector('.entity-result__secondary-subtitle')?.textContent.trim(),
summary: x.querySelector('.entity-result__summary')?.textContent.trim(),
commonConnectionsSummary: x.querySelector('.entity-result__simple-insight-text')?.textContent.trim(),
commonConnections: [...x.querySelectorAll('.entity-result__simple-insight-text a')].map(y =>
y.textContent.trim()
),
avatar: x.querySelector('img')?.src
}
})
// console.log(people)
return people
}
const nextPage = async () => {
window.scrollTo(0, document.body.scrollHeight)
await delay(500)
document.querySelector('button[aria-label="Next"]').click()
await addScrapeButtons()
}
const scrape = async pagesCount => {
if (window.peopleMap) {
var peopleMap = window.peopleMap
} else {
var peopleMap = new Map()
window.peopleMap = peopleMap
}
for (let i = 0; i < pagesCount; i++) {
const people = getPeople()
people.forEach(person => peopleMap.set(person.id, person))
console.log(peopleMap, people)
await nextPage()
await delay(2000 + Math.random() * 10000)
}
}
const addScrapeButton = pagesCount => {
const getFiltersList = () => document.querySelector('.search-reusables__filters-bar-grouping')
const addPill = filtersList => {
filtersList.style.gap = '7px'
const pill = document.querySelector('.artdeco-pill').cloneNode()
pill.textContent = `Scrape ${pagesCount}`
pill.addEventListener('click', () => scrape(pagesCount))
filtersList.append(pill)
}
let filtersList = getFiltersList()
if (filtersList) {
addPill(filtersList)
return Promise.resolve()
}
return new Promise(resolve => {
const check = (changes, observer) => {
const _filtersList = getFiltersList()
if (_filtersList) {
observer.disconnect()
addPill(_filtersList)
resolve()
}
}
new MutationObserver(check).observe(document, { childList: true, subtree: true })
})
}
const addScrapeButtons = async () => {
await delay(100)
await addScrapeButton(1)
await addScrapeButton(5)
await addScrapeButton(10)
await addScrapeButton(20)
await addScrapeButton(50)
await addScrapeButton(100)
}
;(function () {
'use strict'
addScrapeButtons()
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment