Created
July 9, 2013 11:17
-
-
Save capaj/5956601 to your computer and use it in GitHub Desktop.
phantomJS method to get raw HTML from SPA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var pjson = require('../package.json'); | |
var log4js = require('log4js'); | |
var sLogger = log4js.getLogger('server'); | |
var bots = [ | |
'+https://developers.google.com/+/web/snippet/', // Google+ | |
'facebookexternalhit', | |
'Googlebot' | |
]; | |
var domain = pjson.domain || 'http://localhost'; | |
module.exports = { | |
/** | |
* Checks whether bot is allowed. | |
* @param {string} userAgent | |
*/ | |
isAllowedBot: function (userAgent) { | |
var index = bots.length; | |
while (index--) { | |
if (userAgent.indexOf(bots[index])!== -1) { | |
return true; | |
} | |
} | |
return false; | |
}, | |
/** | |
* Fetches given url from PhantomJS headless webkit | |
* @param proxy {object} proxy returned in a callback after creating a phantom proxy | |
* @param url {string} url you wish to fetch | |
* @param res {object} express.js response | |
* @param port {number} port | |
*/ | |
getFromPhantom: function(proxy, url, res, port) { | |
var url = domain + ':' + port + url; | |
sLogger.info('crawler identified, booting phantom on url ' + url); | |
var page = proxy.page; | |
page.open(url, function () { | |
page.waitForSelector('#phantomFlag', function () { | |
sLogger.info('phantomflag present'); | |
var pageContent = null; | |
page.evaluate(function () { | |
return document.getElementsByTagName('html')[0].innerHTML; | |
}, function(ret){ | |
pageContent = ret; | |
sLogger.info("Callback has been fired, page " + url + " was rendered in PhantomJS"); | |
res.send(pageContent); | |
res.end(); | |
}); | |
}); | |
}); | |
}, | |
// for phantom proxy | |
options: {'loadImages': 'no', 'maxDiskCache': '500000'} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Then you use these methods like this(express.js, phantom-proxy as dependencies):
var phantom = require('./phantomBridge.js');
phantomProxy.create(phantom.options, function (proxy) {
if (phantom.isAllowedBot(agent)) {
phantom.getFromPhantom(proxy, req.url, res, app.get('port'));
} else {
res.sendfile('./public/index.html');
}
}