Last active
February 24, 2020 05:02
-
-
Save tschloss/5982926 to your computer and use it in GitHub Desktop.
# Xing-Scraper ## Description
Scrapes the contact list of a contact (visible page) to CouchDB
Works on the "Kontakte" Tab of a contact (if contacts are deiplayed by policy)
This version scrapes only the 10 contacts visible, the next version will skip to the next page until last page has been reached ## Prerequisites: 1. This Javascript code has …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Xing-Scraper | |
// @namespace http://use.i.E.your.homepage/ | |
// @version 0.95 | |
// @description scrapes the contact list of a contact (visible page) to CouchDB | |
// @match https://www.xing.com/app/profile* | |
// @copyright 2012+, You | |
// ==/UserScript== | |
// Works on the "Kontakte" Tab of a contact (if contacts are deiplayed by policy) | |
// This version scrapes only the 10 contacts visible, ... | |
// ...the next version will skip to the next page until last page has been reached | |
// Prerequisites: | |
// 1. This Javascript code has to be injected into the loaded Xing-page | |
// ... either via Greasemonkey/Usersripts or similar | |
// ....or by using a bookmarklet (not tried yet, but better for occasional use) | |
// 2. Scraping produces a Javascript-object. To save this object this script uses CouchDB | |
// ... so you will need a CouchDB database and know the URL of it | |
// ... I use Iriscouch, free (for small use) online service https://www.iriscouch.com | |
var couchdburl="https://XXXXXXXX.iriscouch.com:6984/xing"; //YOUR URL goes here!!! | |
// ... you have to create a databse with the name "xing" | |
// 3. Prevent the Browser to block AJAX-requests to non-Xing-URLs | |
// ... (FYI "same origin policy") http://en.wikipedia.org/wiki/Same_origin_policy | |
// ... my way: start Chrome with parameter "--disable-web-security" | |
// ... eg. on OSX enter terminal and launch "open /Applications/Google\ Chrome.app/ --args --disable-web-security" | |
// ... (Discussion: tried "jsonp" instead but jQuery changed PUT and POST requests to GET - did not work at all!!) | |
// This is just a place to print some feedback to the page. "alert()" does not work in xing.com | |
$("div#profile").append('<span id="ooo"> </span>'); | |
//$("div.wrap_content_top").append('<span id="ooo"> </span>'); | |
$("span#ooo").css("background-color","yellow"); | |
// Little utility function: "containsEl" does array contain a value already | |
Array.prototype.containsel = function(v) { | |
for(var i = 0; i < this.length; i++) { | |
if(this[i] === v) return true; | |
} | |
return false; | |
}; | |
// source contact information | |
var sourceid = $("ul.subnav > li").eq(1).find("a").attr("href").replace(/^.*\/profile\/(.*)$/,"$1"); | |
//... more contact data like company, name, function, fon, email etc. | |
//... and PUT this information as well | |
// if present scrape one page of linked contacts (10) | |
$("table#profile-contacts").find("tr").each(function(i){ | |
var el = $("td", this).find("a.user-photo"); | |
var hovercardurl=el.attr("data-hover-card"); | |
var photourl=el.find("img").attr("src"); | |
el = $("td", this).eq(1).find("a"); | |
var name = el.eq(0).text(); | |
var link = el.eq(0).attr("href"); | |
id=link.replace(/^.*\/profile\/(.*?)\/.*$/,"$1"); | |
var firma = el.eq(-1).text(); | |
var contact={}; | |
jQuery.ajax({type:"GET", async:false, url:couchdburl+"/"+id,dataType:"json",success:function(data,status){contact = data;} }); | |
if (contact["_id"] !== undefined) { | |
// add source | |
if (contact["source"] !== undefined) { | |
if (!contact["source"].containsel(sourceid)) contact["source"] = contact["source"].concat(sourceid); //??? geht das add element to array? | |
//else do nothing ... maybe prevent empty update to be sent to the databse | |
} else | |
contact["source"] = [sourceid]; | |
} else { | |
// create contact | |
contact = {"_id":id, "name":name, "company":firma, "source":[sourceid], "url":"https://xing.com"+link, "photourl":"https://xing.com"+photourl, "hovercardurl":hovercardurl}; | |
} | |
//$("span#ooo").append("<br>Scrape "+id); | |
jQuery.ajax({ | |
type:"POST", // ... seems to work with POST instead of PUT too (make sure _id and for update _rev is in the data object! | |
url:couchdburl, // ... can use it with _id in the data instead path+id, | |
contentType:"application/json; charset=UTF-8", | |
data: JSON.stringify(contact), | |
error:function(xhr,errmsg,err){ $("span#ooo").append(" Error:"+errmsg+JSON.stringify(err)); }, | |
//complete:function(xhr,status){ $("span#ooo").append(" Status:"+status); }, | |
success:function(data,status){ $("span#ooo").append(" OK:"+JSON.stringify(data)); } | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment