Skip to content

Instantly share code, notes, and snippets.

@addisaden
Last active August 29, 2015 14:22
Show Gist options
  • Save addisaden/2bc49833335e2449ebf8 to your computer and use it in GitHub Desktop.
Save addisaden/2bc49833335e2449ebf8 to your computer and use it in GitHub Desktop.
Script in action http://youtu.be/dN3plMcxYN0
//immobilienscout24.de filter links to a blank page
// create an iframe environment for a specific link (host must be the same, but not the same url!)
// reusable!!
// this for the callback is the new document object.
//
var VirtualBrowser = function(callback) {
return function(link) {
var savedArguments = arguments;
var bs = document.createElement("iframe");
bs.setAttribute("src", link);
bs.setAttribute("style", "display: none;");
bs.onload = (function() {
// make a deep copy before cleanup
var clonedNodes = bs.contentDocument.cloneNode(true);
// cleanup for the mem of the browser
document.body.removeChild(bs)
callback.apply(clonedNodes, savedArguments);
}).bind(this);
document.body.appendChild(bs);
}
}
var WindowBrowser = function(callback) {
return function(link) {
var savedArguments = arguments;
var bs = window.open(link);
bs.window.onload = (function() {
var clonedNodes = bs.document.cloneNode(true);
bs.close();
callback.apply(clonedNodes, savedArguments);
}).bind(this);
};
};
// BrowserSandbox get all the links from a result-page to a linklist
// These Links are direct links to single immobilien.
//
var BrowserSandbox = VirtualBrowser(function(link, linklist, callback) {
[].slice.call(this.getElementsByClassName("resultlist_entry_data")).map(function(l) {
var newLink = l.childNodes[1].childNodes[1].childNodes[1].href;
linklist.push(newLink);
});
try {
var nextPage = this.getElementById("pager_next").getElementsByTagName("a")[0].href;
}
catch(e) {
var nextPage = undefined;
}
if(nextPage) {
BrowserSandbox(nextPage, linklist, callback);
} else {
callback(linklist);
}
});
// Process the current Resultpage (find links of single immobilien)
// and drive on all nextpages a BrowserSandbox!
//
var processPage = function(callback) {
var immoScoutLinks = [].slice.call(document.getElementsByClassName("resultlist_entry_data")).map(function(l) {
return l.childNodes[1].childNodes[1].childNodes[1].href;
});
try {
var nextPage = document.getElementById("pager_next").getElementsByTagName("a")[0].href;
}
catch(e) {
var nextPage = undefined;
}
if(nextPage) {
BrowserSandbox(nextPage, immoScoutLinks, callback);
} else {
callback(immoScoutLinks);
}
};
/*
* all fields of a page
* --------------------
* var xxx = ([].slice.call(document.getElementsByClassName("is24-ex-details")[0].getElementsByClassName("print-two-columns")).map(function(i) {
* return [].slice.call(i.getElementsByTagName("dl"));
* }))
*
* var xy = [];
* for(var i in xxx) { for(var j in xxx[i]) { xy.push(xxx[i][j]); }}
*
* var z = {};
* xy.map(function(i) { z[i.children[0].textContent.trim()] = i.children[1].textContent.trim(); })
*
*/
// ONLY SINGLE IMMO SITES!!
// .... NOT ALLOWED ....
//
var immoDataFetcher = WindowBrowser(function(link, restLinks, results, callback) {
var xxx = ([].slice.call(this.getElementsByClassName("is24-ex-details")[0].getElementsByClassName("print-two-columns")).map(function(i) {
return [].slice.call(i.getElementsByTagName("dl"));
}))
var xy = [];
for(var i in xxx) {
for(var j in xxx[i]) {
xy.push(xxx[i][j]);
}
}
var z = {};
xy.map(function(i) {
z[i.children[0].textContent.trim()] = i.children[1].textContent.trim();
});
results[link] = z;
var nextLink = restLinks.pop();
if(nextLink) {
immoDataFetcher(nextLink, restLinks, results, callback);
} else {
callback(results);
}
});
// This Callback will be processed when the script has collected all
// search result links of immoscout 24
//
processPage(function(immoScoutLinks) {
[].slice.call(document.body.childNodes).map(function(l) {
document.body.removeChild(l);
});
var reloadPage = document.createElement("a");
reloadPage.setAttribute("href", "javascript:window.location.reload();");
reloadPage.appendChild(document.createTextNode("Reload Page"));
document.body.appendChild(reloadPage);
document.body.appendChild(document.createElement("br"));
var downloadlink = document.createElement("a");
downloadlink.setAttribute("href", "data:application/json;charset=utf-8," + encodeURIComponent(JSON.stringify(immoScoutLinks)));
downloadlink.appendChild(document.createTextNode("Download Links in JSON-File"));
document.body.appendChild(downloadlink);
document.body.appendChild(document.createElement("br"));
var nextLink = immoScoutLinks.pop();
immoDataFetcher(nextLink, immoScoutLinks, {}, function(results) {
// csv file downloader
var csvfile = [];
csvfile.push(["Link"]);
// get all the headers
for(var i in results) {
for(var j in results[i]) {
if(csvfile[0].indexOf(j) === -1) {
csvfile[0].push(j);
}
}
}
for(var i in results) {
var linedata = new Array(csvfile[0].length);
linedata[0] = i;
for(var j in results[i]) {
var idx = csvfile[0].indexOf(j);
linedata[idx] = results[i][j];
}
linedata = linedata.map(function(l) { return "\"" + l.replace(/"/g, "\\\"") + "\""; });
csvfile.push(linedata);
}
csvfile[0] = csvfile[0].map(function(l) { return "\"" + l.replace(/"/g, "\\\"") + "\""; });
csvfile = csvfile.map(function(l) { return l.join(";"); }).join("\r\n");
var csvdownloadlink = document.createElement("a");
csvdownloadlink.setAttribute("href", "data:text/csv;charset=utf-8," + encodeURIComponent(csvfile));
csvdownloadlink.appendChild(document.createTextNode("Download All Data in CSV-File"));
document.body.appendChild(csvdownloadlink);
document.body.appendChild(document.createElement("br"));
// json file downloader
var downloadAllLink = document.createElement("a");
downloadAllLink.setAttribute("href", "data:application/json;charset=utf-8," + encodeURIComponent(JSON.stringify(results)));
downloadAllLink.appendChild(document.createTextNode("Download All Data in JSON-File"));
document.body.appendChild(downloadAllLink);
document.body.appendChild(document.createElement("br"));
// print json data nice formated
var anotherList = document.createElement("ul");
document.body.appendChild(anotherList);
for(var result in results) {
var header = document.createElement("h3");
var headerLink = document.createElement("a");
headerLink.setAttribute("href", result);
headerLink.appendChild(document.createTextNode(result));
header.appendChild(headerLink);
anotherList.appendChild(header);
for(var element in results[result]) {
var listElement = document.createElement("li");
anotherList.appendChild(listElement);
listElement.appendChild(document.createTextNode(element + ": " + results[result][element]));
}
}
});
});
// this little guy stops the redirect page!
undefined;
// show only formular on http://www.immobilienscout24.de/
var formular = document.body.getElementsByClassName("parbase sectionheader section")[0];
[].slice.call(document.body.childNodes).map(function(e) { document.body.removeChild(e); });
document.body.appendChild(formular);
// just load form in new window
var windowSearchOnly = function() {
var formular = this.document.body.getElementsByClassName("parbase sectionheader section")[0];
[].slice.call(this.document.body.childNodes).map((function(e) {
this.document.body.removeChild(e);
}).bind(this));
this.document.body.appendChild(document.createTextNode("Bitte wechseln Sie das Tab wenn Ihre Suchergebnisse angezeigt werden."));
this.document.body.appendChild(formular);
};
var w = window.open("http://www.immobilienscout24.de");
w.onload = windowSearchOnly.bind(w);
// useful snippet to process results with callback
var f = function() {
var result = {}
setInterval((function() { this.hallo = "Welt"; }).bind(result), 5000)
return result;
}
var x = f()
x
//=> {}
x
//=> {}
// after 5 seconds
x
//=> { hallo: 'Welt' }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment