Created
May 17, 2014 20:32
-
-
Save valentinkostadinov/4b01796991e1f88bfae0 to your computer and use it in GitHub Desktop.
PhantomJS SEO HTML snapshot server - for AJAX crawling / JavaScript SEO
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* SEO server | |
* | |
* Generates and serves HTML snapshots for crawlers, esp. GoogleBot. | |
* See https://developers.google.com/webmasters/ajax-crawling/docs/specification | |
* | |
* Note: This is run by phantonjs, not node. See http://phantomjs.org | |
* | |
*/ | |
var system = require('system') | |
var webpage = require('webpage') | |
var webserver = require('webserver') | |
// load config | |
var env = (system.env.NODE_ENV == 'production') ? 'prod' : 'dev' | |
var conf = require('../../conf/configs')[env] | |
var port = conf.seo.port || 8888 | |
var targetBaseUrl = conf.web.domain | |
var render = function(url, done) { | |
var page = webpage.create() | |
// configure headless browser | |
page.settings.loadImages = false | |
page.settings.localToRemoteUrlAccessEnabled = true | |
page.onCallback = function() { | |
done(null, page.content) | |
page.close() | |
} | |
page.open(url, function(status) { | |
if (status != 'success') { | |
done(status) | |
page.close() | |
} | |
}) | |
} | |
// kick off the server | |
var server = webserver.create() | |
server.listen(port, function(req, res) { | |
var url = req.url | |
// only GET allowed | |
if (req.method != 'GET') { | |
console.error('Deny request method', req.method, url) | |
res.statusCode = 405 | |
res.write('') | |
res.close() | |
return | |
} | |
var escapedFragment = /[&\?]_escaped_fragment_=([^&]*)/ | |
var match = url.match(escapedFragment) | |
if (!match) { | |
console.error('Deny request', url) | |
res.statusCode = 400 | |
res.write('') | |
res.close() | |
return | |
} | |
var prettyUrl = url.replace(escapedFragment, '') | |
var hashFragment = match[1] | |
if (hashFragment) { | |
prettyUrl += '#!' + unescape(hashFragment) | |
} | |
var targetUrl = targetBaseUrl + prettyUrl | |
console.log('Fetching', targetUrl, '(was', url + ')', req.headers['user-agent']) | |
render(targetUrl, function(err, html) { | |
if (err) { | |
console.error('Failed to load page', targetUrl) | |
res.statusCode = 500 | |
res.write('') | |
res.close() | |
return | |
} | |
res.statusCode = 200 | |
res.write(html) | |
res.close() | |
console.log('Served page', targetUrl) | |
}) | |
}) | |
console.info('Listening on port', port, 'targeting', targetBaseUrl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
var conf = require('../../conf/configs')[env]
which one conf file defined! i did'nt understand.
please help me.