jaseclamp · May 16, 2025 20:08
diff --git a/linkedin.js b/linkedin.js

 /*
 what this script does: 
 it pages through linkedin search results and copies names, images, company, location, position into a javascript array
 once it reaches the end of the results it will prompt to download all the names as a tab delimited csv. 
 to use:
 go to linkedin, run a search exactly how you want it 
 go to page 2 of those results 
 paste the below script into console and hit enter 
 paste this line into console to enable the script: 
 var capture = 1; 
 paste this line into console to disable the script: 
 var capture = 0; 
 the reason for this is once you paste the below script in
 it will automatically keep paging through results
 the only way to stop it is to paste in and execute var capture = 0; 
 you must keep your browser window active and in the forefront for this to work
 if you can, ensure screen does not turn off, disable screen saver etc 
 their ember js only loads content it believes is being seen 
 why it was built this way: 
 this emulates human usage more. 
 it loads a page, scrolls up and down then saves data from the page. 
 I suppose it could have been possible to pull data direct from the json that supplies content to their js app 
 but as I mention below, that json is complex. I think it would be easier to adapt to changes. 
 The draw back is it's slower.
 Warning - do not use this script to violate any TOS!!! 
 Only use to supplement the way in which you as a human user would normally page through and look at results. 
 */

 //set this number to the total search results 
 //the max is 1000 , linkedin does not show beyond that so narrow search
 var total =  991; 

 var people = [];

 var i = 0;

 var capture = 1; 


 (function() {
    var origOpen = XMLHttpRequest.prototype.open;
    XMLHttpRequest.prototype.open = function(method, url) {
    	if (url.includes('blended')) scrape();
        origOpen.apply(this, arguments);
    };
 })();


 //create a listener for ajax complete
 function scrape() { 

 	//only run if it's the cluster response which is search results coming back
 	if( capture==1) {

 		//have to scroll page up and down to get ember to load unseen content 
 		//yes the content is sort of in the xhr object but it's pretty complex to understand the structure 
 		//of where all the data exists 
 		jQuery("html, body").animate({ scrollTop: 0 }, 1000);
 		jQuery("html, body").animate({ scrollTop: jQuery(document).height() }, 1000);

 		//once that is done we capture the content into an array 
 		setTimeout(function(){  
 		
 			jQuery.each( jQuery('li.search-result'), function(n,val){

 				people[i] = {};

 				people[i].name = jQuery(this).find('span.actor-name').text(); 
 				people[i].link = jQuery(this).find("a[href^='/in']").prop('href'); 
 				people[i].des = jQuery(this).find('p.subline-level-1').text().replace(/[\n\r]+/g, ''); 
 				people[i].loc = jQuery(this).find('p.subline-level-2').text().replace(/[\n\r]+/g, ''); 

 				console.log('iteration'+i);
 				console.log(people[i]);

 				//increment array counter
 				i++;

 			});

 			//if we're still rolling lets simulate click next 
 			if(i<=total) 
 			{
 				jQuery(".artdeco-pagination__button--next").click();
 			//otherwise we're going to do a tsv download of all  the data 
 			}else{
 				var tsv = tabValues(people);
 				var hiddenElement = document.createElement('a');
 				hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(tsv);
 				hiddenElement.target = '_blank';
 				hiddenElement.download = 'people.csv';
 				hiddenElement.click();
 			}


 		}, 3000);



 	}

 };


 function tabValues(array) {
    
    var keys = Object.keys(array[0]);

    var result = keys.join("\t") + "\n";

    array.forEach(function(obj){
        keys.forEach(function(k, ix){
            if (ix) result += "\t";
            result += obj[k];
        });
        result += "\n";
    });

    return result;
 }

	/*
	what this script does:
	it pages through linkedin search results and copies names, images, company, location, position into a javascript array
	once it reaches the end of the results it will prompt to download all the names as a tab delimited csv.
	to use:
	go to linkedin, run a search exactly how you want it
	go to page 2 of those results
	paste the below script into console and hit enter
	paste this line into console to enable the script:
	var capture = 1;
	paste this line into console to disable the script:
	var capture = 0;
	the reason for this is once you paste the below script in
	it will automatically keep paging through results
	the only way to stop it is to paste in and execute var capture = 0;
	you must keep your browser window active and in the forefront for this to work
	if you can, ensure screen does not turn off, disable screen saver etc
	their ember js only loads content it believes is being seen
	why it was built this way:
	this emulates human usage more.
	it loads a page, scrolls up and down then saves data from the page.
	I suppose it could have been possible to pull data direct from the json that supplies content to their js app
	but as I mention below, that json is complex. I think it would be easier to adapt to changes.
	The draw back is it's slower.
	Warning - do not use this script to violate any TOS!!!
	Only use to supplement the way in which you as a human user would normally page through and look at results.
	*/

	//set this number to the total search results
	//the max is 1000 , linkedin does not show beyond that so narrow search
	var total = 991;

	var people = [];

	var i = 0;

	var capture = 1;


	(function() {
	var origOpen = XMLHttpRequest.prototype.open;
	XMLHttpRequest.prototype.open = function(method, url) {
	if (url.includes('blended')) scrape();
	origOpen.apply(this, arguments);
	};
	})();


	//create a listener for ajax complete
	function scrape() {

	//only run if it's the cluster response which is search results coming back
	if( capture==1) {

	//have to scroll page up and down to get ember to load unseen content
	//yes the content is sort of in the xhr object but it's pretty complex to understand the structure
	//of where all the data exists
	jQuery("html, body").animate({ scrollTop: 0 }, 1000);
	jQuery("html, body").animate({ scrollTop: jQuery(document).height() }, 1000);

	//once that is done we capture the content into an array
	setTimeout(function(){

	jQuery.each( jQuery('li.search-result'), function(n,val){

	people[i] = {};

	people[i].name = jQuery(this).find('span.actor-name').text();
	people[i].link = jQuery(this).find("a[href^='/in']").prop('href');
	people[i].des = jQuery(this).find('p.subline-level-1').text().replace(/[\n\r]+/g, '');
	people[i].loc = jQuery(this).find('p.subline-level-2').text().replace(/[\n\r]+/g, '');

	console.log('iteration'+i);
	console.log(people[i]);

	//increment array counter
	i++;

	});

	//if we're still rolling lets simulate click next
	if(i<=total)
	{
	jQuery(".artdeco-pagination__button--next").click();
	//otherwise we're going to do a tsv download of all the data
	}else{
	var tsv = tabValues(people);
	var hiddenElement = document.createElement('a');
	hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(tsv);
	hiddenElement.target = '_blank';
	hiddenElement.download = 'people.csv';
	hiddenElement.click();
	}


	}, 3000);



	}

	};


	function tabValues(array) {

	var keys = Object.keys(array[0]);

	var result = keys.join("\t") + "\n";

	array.forEach(function(obj){
	keys.forEach(function(k, ix){
	if (ix) result += "\t";
	result += obj[k];
	});
	result += "\n";
	});

	return result;
	}
No results found