Skip to content

Instantly share code, notes, and snippets.

@wolf0403
Created July 22, 2013 05:21
Show Gist options
  • Save wolf0403/6051452 to your computer and use it in GitHub Desktop.
Save wolf0403/6051452 to your computer and use it in GitHub Desktop.
Deajaxify page for Google crawler with Node.js / Phantom.js
var http = require('http'),
phantom=require('node-phantom'), // https://github.com/alexscheelmeyer/node-phantom
urlparse = require('url');
function proxy_url(url, res) {
phantom.create(function(err,ph) {
return ph.createPage(function(err,page) {
var silence = false;
var pageHasBeenExported = false;
function exportPageContents (page, res) {
if (!pageHasBeenExported) {
pageHasBeenExported = true;
console.log("Page is: " + JSON.stringify(page));
res.write("<!DOCTYPE html><html>");
res.write(page);
res.end("</html>");
if(!silence) { console.log("Finished."); }
ph.exit();
}
}
var activeRequests = [];
page.onResourceRequested = function(request) {
activeRequests.push(request.id);
if (!silence) {
// console.log("Active requests: " + activeRequests.length);
console.log ("Requesting: " + JSON.stringify(request[0].url));
}
}
page.onResourceReceived = function(response) {
// Tricky statement, but it simply removes the id from the array.
activeRequests.splice(activeRequests.indexOf(response.id), 1);
if (!silence) { console.log("Active requests: " + activeRequests.length); }
if (activeRequests == 0) {
// Wait a bit to make sure rendering etc. can finish.
setTimeout(function() {
// Make sure no requests were created in the meantime (in which case this method would automatically be called again once it's finished loading)
if (activeRequests == 0) {
// Write the HTML
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function(err, content) {
exportPageContents(content, res);
});
}
}, 20000); // This is the delay to allow rendering to take place. You can change this value if you like.
}
}
var urlbreak = urlparse.parse(url),
search = urlbreak.search,
re = /[?&]_escaped_fragment_=/,
idx = -1,
hash = '';
if (search) {
idx = search.search(re);
}
if (idx >= 0) {
urlbreak.search = search.substring(0, idx);
url = urlparse.format(urlbreak);
hash = search.replace(re, '#!').substring(idx);
}
var fullurl = 'http://upstream.host.com' + url + hash;
return page.open(fullurl, function(err,status) {
console.log("opened site? ", fullurl, status);
});
});
});
}
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
console.log("Request: " + req.url);
proxy_url(req.url, res);
}).listen(1337, '127.0.0.1');
console.log('Server running at http://127.0.0.1:1337/');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment