Created
June 25, 2012 01:12
-
-
Save turtlesoupy/2985844 to your computer and use it in GitHub Desktop.
A node.js process supervisor and a sample use with health checks - see http://blog.argteam.com/coding/hardening-nodejs-production-process-supervisor/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{_} = require 'underscore' | |
child_process = require 'child_process' | |
async = require 'async' | |
healthCheckInterval = 60 * 1000 | |
bounceInterval = 60 * 1000 | |
bounceWait = bounceInterval + 30 * 1000 | |
delayTimeout = (ms, func) -> setTimeout func, ms | |
class MonitoredChild | |
constructor: (@script, @port, @healthCheck, @environmentVariables) -> | |
@currentChild = null | |
@healthCheckTimeout = null | |
@bounceTimeout = null | |
@expectedExit = false | |
bounce: -> | |
return @respawn() unless @currentChild? | |
console.log "Requested bounce of #{@currentChild.pid}, port #{@port}" | |
clearTimeout @healthCheckTimeout | |
@expectedExit = true | |
@currentChild.kill() | |
@bounceTimeout = delayTimeout bounceInterval, => | |
console.error "Child did not exit in time, forcefully killing it" | |
@currentChild.kill "SIGKILL" | |
delayedHealthCheck: -> | |
@healthCheckTimeout = delayTimeout config.healthCheckInterval, => | |
start = new Date() | |
@healthCheck @port, (healthy) => | |
if healthy | |
console.log "#{@port} is healthy - ping time #{new Date() - start}ms" | |
@delayedHealthCheck() | |
else | |
console.error "#{@port} did not respond in time, killing it harshly" | |
@currentChild.kill "SIGKILL" | |
respawn: -> | |
@currentChild = child_process.spawn process.execPath, [@script], | |
env: _.extend(@environmentVariables, process.env) | |
console.log "Started child", {port: @port, pid: @currentChild.pid} | |
@currentChild.stdout.pipe process.stdout | |
@currentChild.stderr.pipe process.stderr | |
@currentChild.on 'exit', (code, signal) => | |
clearTimeout @healthCheckTimeout if @healthCheckTimeout? | |
clearTimeout @bounceTimeout if @bounceTimeout? | |
if @expectedExit | |
@expectedExit = false | |
console.info "Expected exit from child #{@currentChild.pid}, port #{@port} - respawning" | |
else | |
console.error "Child #{@currentChild.pid}, port #{@port} exited with code #{code}, signal #{signal}, respawning" | |
@respawn() | |
@delayedHealthCheck() | |
exports.bounceChildren = (monitoredChildren, callback) -> | |
async.forEachSeries monitoredChildren, | |
(monitoredChild, seriesCallback) -> | |
monitoredChild.bounce() | |
delayTimeout bounceWait, seriesCallback | |
callback | |
exports.spawnMonitoredChild = (script, port, healthCheck, environmentVariables) -> | |
ret = new MonitoredChild(script, port, healthCheck, environmentVariables) | |
ret.respawn() | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
healthCheck = (port, cb) -> | |
c = net.connect port, 'localhost' | |
c.setEncoding "utf8" | |
gotAuth = false | |
c.on 'data', (data) -> | |
d = null | |
try | |
d = JSON.parse(data) | |
catch error | |
c.end() | |
console.error "Health check failed: bad initial response, #{data}" | |
return cb(false) | |
if !gotAuth | |
if d.cmd == "PLSAUTH" | |
gotAuth = true | |
c.write JSON.stringify({cmd:"RING"}) + "\r\n" | |
else | |
c.end() | |
console.error "Health check failed: bad initial response, #{data}" | |
return cb(false) | |
else | |
c.end() | |
console.info "Health check response", {res: d} | |
return cb(true) | |
c.on 'error', (e) -> | |
console.error "Health check failed: error connecting #{e}" | |
cb(false) | |
c.setTimeout config.healthCheckTimeout, -> c.destroy() | |
numWorkers = 2 | |
startPort = 31337 | |
children = [] | |
for i in [0..numWorkers-1] | |
port = startPort + i | |
children.push(child_monitor.spawnMonitoredChild './lib/sfs_socket', "sfs_socket_#{port}", healthCheck, {SFS_SOCKET_PORT: port, SFS_SOCKET_HOST: socketHost}) | |
process.on "SIGHUP", -> | |
console.log "Received SIGHUP, respawning children" | |
child_monitor.bounceChildren(children) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I am just wondering what's the line number 39 for in sfs_sockets.coffee? Do you have the sample code of sfs_socket_# module inside /libs/sfs_socket? Thanks.