-
-
Save rbecheras/8f7537c6339798eebad5eda5ca347ad6 to your computer and use it in GitHub Desktop.
A node.js process supervisor and a sample use with health checks - see http://blog.argteam.com/coding/hardening-nodejs-production-process-supervisor/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{_} = require 'underscore' | |
child_process = require 'child_process' | |
async = require 'async' | |
healthCheckInterval = 60 * 1000 | |
bounceInterval = 60 * 1000 | |
bounceWait = bounceInterval + 30 * 1000 | |
delayTimeout = (ms, func) -> setTimeout func, ms | |
class MonitoredChild | |
constructor: (@script, @port, @healthCheck, @environmentVariables) -> | |
@currentChild = null | |
@healthCheckTimeout = null | |
@bounceTimeout = null | |
@expectedExit = false | |
bounce: -> | |
return @respawn() unless @currentChild? | |
console.log "Requested bounce of #{@currentChild.pid}, port #{@port}" | |
clearTimeout @healthCheckTimeout | |
@expectedExit = true | |
@currentChild.kill() | |
@bounceTimeout = delayTimeout bounceInterval, => | |
console.error "Child did not exit in time, forcefully killing it" | |
@currentChild.kill "SIGKILL" | |
delayedHealthCheck: -> | |
@healthCheckTimeout = delayTimeout config.healthCheckInterval, => | |
start = new Date() | |
@healthCheck @port, (healthy) => | |
if healthy | |
console.log "#{@port} is healthy - ping time #{new Date() - start}ms" | |
@delayedHealthCheck() | |
else | |
console.error "#{@port} did not respond in time, killing it harshly" | |
@currentChild.kill "SIGKILL" | |
respawn: -> | |
@currentChild = child_process.spawn process.execPath, [@script], | |
env: _.extend(@environmentVariables, process.env) | |
console.log "Started child", {port: @port, pid: @currentChild.pid} | |
@currentChild.stdout.pipe process.stdout | |
@currentChild.stderr.pipe process.stderr | |
@currentChild.on 'exit', (code, signal) => | |
clearTimeout @healthCheckTimeout if @healthCheckTimeout? | |
clearTimeout @bounceTimeout if @bounceTimeout? | |
if @expectedExit | |
@expectedExit = false | |
console.info "Expected exit from child #{@currentChild.pid}, port #{@port} - respawning" | |
else | |
console.error "Child #{@currentChild.pid}, port #{@port} exited with code #{code}, signal #{signal}, respawning" | |
@respawn() | |
@delayedHealthCheck() | |
exports.bounceChildren = (monitoredChildren, callback) -> | |
async.forEachSeries monitoredChildren, | |
(monitoredChild, seriesCallback) -> | |
monitoredChild.bounce() | |
delayTimeout bounceWait, seriesCallback | |
callback | |
exports.spawnMonitoredChild = (script, port, healthCheck, environmentVariables) -> | |
ret = new MonitoredChild(script, port, healthCheck, environmentVariables) | |
ret.respawn() | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
healthCheck = (port, cb) -> | |
c = net.connect port, 'localhost' | |
c.setEncoding "utf8" | |
gotAuth = false | |
c.on 'data', (data) -> | |
d = null | |
try | |
d = JSON.parse(data) | |
catch error | |
c.end() | |
console.error "Health check failed: bad initial response, #{data}" | |
return cb(false) | |
if !gotAuth | |
if d.cmd == "PLSAUTH" | |
gotAuth = true | |
c.write JSON.stringify({cmd:"RING"}) + "\r\n" | |
else | |
c.end() | |
console.error "Health check failed: bad initial response, #{data}" | |
return cb(false) | |
else | |
c.end() | |
console.info "Health check response", {res: d} | |
return cb(true) | |
c.on 'error', (e) -> | |
console.error "Health check failed: error connecting #{e}" | |
cb(false) | |
c.setTimeout config.healthCheckTimeout, -> c.destroy() | |
numWorkers = 2 | |
startPort = 31337 | |
children = [] | |
for i in [0..numWorkers-1] | |
port = startPort + i | |
children.push(child_monitor.spawnMonitoredChild './lib/sfs_socket', "sfs_socket_#{port}", healthCheck, {SFS_SOCKET_PORT: port, SFS_SOCKET_HOST: socketHost}) | |
process.on "SIGHUP", -> | |
console.log "Received SIGHUP, respawning children" | |
child_monitor.bounceChildren(children) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment