Created
May 17, 2014 20:32
-
-
Save valentinkostadinov/4b01796991e1f88bfae0 to your computer and use it in GitHub Desktop.
PhantomJS SEO HTML snapshot server - for AJAX crawling / JavaScript SEO
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* SEO server | |
* | |
* Generates and serves HTML snapshots for crawlers, esp. GoogleBot. | |
* See https://developers.google.com/webmasters/ajax-crawling/docs/specification | |
* | |
* Note: This is run by phantonjs, not node. See http://phantomjs.org | |
* | |
*/ | |
var system = require('system') | |
var webpage = require('webpage') | |
var webserver = require('webserver') | |
// load config | |
var env = (system.env.NODE_ENV == 'production') ? 'prod' : 'dev' | |
var conf = require('../../conf/configs')[env] | |
var port = conf.seo.port || 8888 | |
var targetBaseUrl = conf.web.domain | |
var render = function(url, done) { | |
var page = webpage.create() | |
// configure headless browser | |
page.settings.loadImages = false | |
page.settings.localToRemoteUrlAccessEnabled = true | |
page.onCallback = function() { | |
done(null, page.content) | |
page.close() | |
} | |
page.open(url, function(status) { | |
if (status != 'success') { | |
done(status) | |
page.close() | |
} | |
}) | |
} | |
// kick off the server | |
var server = webserver.create() | |
server.listen(port, function(req, res) { | |
var url = req.url | |
// only GET allowed | |
if (req.method != 'GET') { | |
console.error('Deny request method', req.method, url) | |
res.statusCode = 405 | |
res.write('') | |
res.close() | |
return | |
} | |
var escapedFragment = /[&\?]_escaped_fragment_=([^&]*)/ | |
var match = url.match(escapedFragment) | |
if (!match) { | |
console.error('Deny request', url) | |
res.statusCode = 400 | |
res.write('') | |
res.close() | |
return | |
} | |
var prettyUrl = url.replace(escapedFragment, '') | |
var hashFragment = match[1] | |
if (hashFragment) { | |
prettyUrl += '#!' + unescape(hashFragment) | |
} | |
var targetUrl = targetBaseUrl + prettyUrl | |
console.log('Fetching', targetUrl, '(was', url + ')', req.headers['user-agent']) | |
render(targetUrl, function(err, html) { | |
if (err) { | |
console.error('Failed to load page', targetUrl) | |
res.statusCode = 500 | |
res.write('') | |
res.close() | |
return | |
} | |
res.statusCode = 200 | |
res.write(html) | |
res.close() | |
console.log('Served page', targetUrl) | |
}) | |
}) | |
console.info('Listening on port', port, 'targeting', targetBaseUrl) |
var conf = require('../../conf/configs')[env]
which one conf file defined! i did'nt understand.
please help me.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice. Just a note, I built a service that provides this functionality as well. Might be easier than running your own instances. Plus we use Firefox not PhantomJS since it allows us to stay current with the latest features of HTML5. Check it out https://snapsearch.io/