Last active
August 26, 2019 06:32
-
-
Save jaseclamp/ece1f81672a764d379e830e7acee3307 to your computer and use it in GitHub Desktop.
linkedin search results pager
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
what this script does: | |
it pages through linkedin search results and copies names, images, company, location, position into a javascript array | |
once it reaches the end of the results it will prompt to download all the names as a tab delimited csv. | |
to use: | |
go to linkedin, run a search exactly how you want it | |
go to page 2 of those results | |
paste the below script into console and hit enter | |
paste this line into console to enable the script: | |
var capture = 1; | |
paste this line into console to disable the script: | |
var capture = 0; | |
the reason for this is once you paste the below script in | |
it will automatically keep paging through results | |
the only way to stop it is to paste in and execute var capture = 0; | |
you must keep your browser window active and in the forefront for this to work | |
if you can, ensure screen does not turn off, disable screen saver etc | |
their ember js only loads content it believes is being seen | |
why it was built this way: | |
this emulates human usage more. | |
it loads a page, scrolls up and down then saves data from the page. | |
I suppose it could have been possible to pull data direct from the json that supplies content to their js app | |
but as I mention below, that json is complex. I think it would be easier to adapt to changes. | |
The draw back is it's slower. | |
Warning - do not use this script to violate any TOS!!! | |
Only use to supplement the way in which you as a human user would normally page through and look at results. | |
*/ | |
//set this number to the total search results | |
//the max is 1000 , linkedin does not show beyond that so narrow search | |
var total = 991; | |
var people = []; | |
var i = 0; | |
var capture = 1; | |
(function() { | |
var origOpen = XMLHttpRequest.prototype.open; | |
XMLHttpRequest.prototype.open = function(method, url) { | |
if (url.includes('blended')) scrape(); | |
origOpen.apply(this, arguments); | |
}; | |
})(); | |
//create a listener for ajax complete | |
function scrape() { | |
//only run if it's the cluster response which is search results coming back | |
if( capture==1) { | |
//have to scroll page up and down to get ember to load unseen content | |
//yes the content is sort of in the xhr object but it's pretty complex to understand the structure | |
//of where all the data exists | |
jQuery("html, body").animate({ scrollTop: 0 }, 1000); | |
jQuery("html, body").animate({ scrollTop: jQuery(document).height() }, 1000); | |
//once that is done we capture the content into an array | |
setTimeout(function(){ | |
jQuery.each( jQuery('li.search-result'), function(n,val){ | |
people[i] = {}; | |
people[i].name = jQuery(this).find('span.actor-name').text(); | |
people[i].link = jQuery(this).find("a[href^='/in']").prop('href'); | |
people[i].des = jQuery(this).find('p.subline-level-1').text().replace(/[\n\r]+/g, ''); | |
people[i].loc = jQuery(this).find('p.subline-level-2').text().replace(/[\n\r]+/g, ''); | |
console.log('iteration'+i); | |
console.log(people[i]); | |
//increment array counter | |
i++; | |
}); | |
//if we're still rolling lets simulate click next | |
if(i<=total) | |
{ | |
jQuery(".artdeco-pagination__button--next").click(); | |
//otherwise we're going to do a tsv download of all the data | |
}else{ | |
var tsv = tabValues(people); | |
var hiddenElement = document.createElement('a'); | |
hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(tsv); | |
hiddenElement.target = '_blank'; | |
hiddenElement.download = 'people.csv'; | |
hiddenElement.click(); | |
} | |
}, 3000); | |
} | |
}; | |
function tabValues(array) { | |
var keys = Object.keys(array[0]); | |
var result = keys.join("\t") + "\n"; | |
array.forEach(function(obj){ | |
keys.forEach(function(k, ix){ | |
if (ix) result += "\t"; | |
result += obj[k]; | |
}); | |
result += "\n"; | |
}); | |
return result; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment