Created
July 22, 2013 05:21
-
-
Save wolf0403/6051452 to your computer and use it in GitHub Desktop.
Deajaxify page for Google crawler with Node.js / Phantom.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'), | |
phantom=require('node-phantom'), // https://github.com/alexscheelmeyer/node-phantom | |
urlparse = require('url'); | |
function proxy_url(url, res) { | |
phantom.create(function(err,ph) { | |
return ph.createPage(function(err,page) { | |
var silence = false; | |
var pageHasBeenExported = false; | |
function exportPageContents (page, res) { | |
if (!pageHasBeenExported) { | |
pageHasBeenExported = true; | |
console.log("Page is: " + JSON.stringify(page)); | |
res.write("<!DOCTYPE html><html>"); | |
res.write(page); | |
res.end("</html>"); | |
if(!silence) { console.log("Finished."); } | |
ph.exit(); | |
} | |
} | |
var activeRequests = []; | |
page.onResourceRequested = function(request) { | |
activeRequests.push(request.id); | |
if (!silence) { | |
// console.log("Active requests: " + activeRequests.length); | |
console.log ("Requesting: " + JSON.stringify(request[0].url)); | |
} | |
} | |
page.onResourceReceived = function(response) { | |
// Tricky statement, but it simply removes the id from the array. | |
activeRequests.splice(activeRequests.indexOf(response.id), 1); | |
if (!silence) { console.log("Active requests: " + activeRequests.length); } | |
if (activeRequests == 0) { | |
// Wait a bit to make sure rendering etc. can finish. | |
setTimeout(function() { | |
// Make sure no requests were created in the meantime (in which case this method would automatically be called again once it's finished loading) | |
if (activeRequests == 0) { | |
// Write the HTML | |
page.evaluate(function () { | |
return document.documentElement.innerHTML; | |
}, function(err, content) { | |
exportPageContents(content, res); | |
}); | |
} | |
}, 20000); // This is the delay to allow rendering to take place. You can change this value if you like. | |
} | |
} | |
var urlbreak = urlparse.parse(url), | |
search = urlbreak.search, | |
re = /[?&]_escaped_fragment_=/, | |
idx = -1, | |
hash = ''; | |
if (search) { | |
idx = search.search(re); | |
} | |
if (idx >= 0) { | |
urlbreak.search = search.substring(0, idx); | |
url = urlparse.format(urlbreak); | |
hash = search.replace(re, '#!').substring(idx); | |
} | |
var fullurl = 'http://upstream.host.com' + url + hash; | |
return page.open(fullurl, function(err,status) { | |
console.log("opened site? ", fullurl, status); | |
}); | |
}); | |
}); | |
} | |
http.createServer(function (req, res) { | |
res.writeHead(200, {'Content-Type': 'text/plain'}); | |
console.log("Request: " + req.url); | |
proxy_url(req.url, res); | |
}).listen(1337, '127.0.0.1'); | |
console.log('Server running at http://127.0.0.1:1337/'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment