Skip to content

Instantly share code, notes, and snippets.

@valentinkostadinov
Created May 17, 2014 20:32
Show Gist options
  • Save valentinkostadinov/4b01796991e1f88bfae0 to your computer and use it in GitHub Desktop.
Save valentinkostadinov/4b01796991e1f88bfae0 to your computer and use it in GitHub Desktop.
PhantomJS SEO HTML snapshot server - for AJAX crawling / JavaScript SEO
/*
* SEO server
*
* Generates and serves HTML snapshots for crawlers, esp. GoogleBot.
* See https://developers.google.com/webmasters/ajax-crawling/docs/specification
*
* Note: This is run by phantonjs, not node. See http://phantomjs.org
*
*/
var system = require('system')
var webpage = require('webpage')
var webserver = require('webserver')
// load config
var env = (system.env.NODE_ENV == 'production') ? 'prod' : 'dev'
var conf = require('../../conf/configs')[env]
var port = conf.seo.port || 8888
var targetBaseUrl = conf.web.domain
var render = function(url, done) {
var page = webpage.create()
// configure headless browser
page.settings.loadImages = false
page.settings.localToRemoteUrlAccessEnabled = true
page.onCallback = function() {
done(null, page.content)
page.close()
}
page.open(url, function(status) {
if (status != 'success') {
done(status)
page.close()
}
})
}
// kick off the server
var server = webserver.create()
server.listen(port, function(req, res) {
var url = req.url
// only GET allowed
if (req.method != 'GET') {
console.error('Deny request method', req.method, url)
res.statusCode = 405
res.write('')
res.close()
return
}
var escapedFragment = /[&\?]_escaped_fragment_=([^&]*)/
var match = url.match(escapedFragment)
if (!match) {
console.error('Deny request', url)
res.statusCode = 400
res.write('')
res.close()
return
}
var prettyUrl = url.replace(escapedFragment, '')
var hashFragment = match[1]
if (hashFragment) {
prettyUrl += '#!' + unescape(hashFragment)
}
var targetUrl = targetBaseUrl + prettyUrl
console.log('Fetching', targetUrl, '(was', url + ')', req.headers['user-agent'])
render(targetUrl, function(err, html) {
if (err) {
console.error('Failed to load page', targetUrl)
res.statusCode = 500
res.write('')
res.close()
return
}
res.statusCode = 200
res.write(html)
res.close()
console.log('Served page', targetUrl)
})
})
console.info('Listening on port', port, 'targeting', targetBaseUrl)
@CMCDragonkai
Copy link

Nice. Just a note, I built a service that provides this functionality as well. Might be easier than running your own instances. Plus we use Firefox not PhantomJS since it allows us to stay current with the latest features of HTML5. Check it out https://snapsearch.io/

@verishal
Copy link

var conf = require('../../conf/configs')[env]
which one conf file defined! i did'nt understand.
please help me.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment