Skip to content

Instantly share code, notes, and snippets.

@rbecheras
Forked from turtlesoupy/child_monitor.coffee
Created April 11, 2016 22:53
Show Gist options
  • Save rbecheras/8f7537c6339798eebad5eda5ca347ad6 to your computer and use it in GitHub Desktop.
Save rbecheras/8f7537c6339798eebad5eda5ca347ad6 to your computer and use it in GitHub Desktop.
A node.js process supervisor and a sample use with health checks - see http://blog.argteam.com/coding/hardening-nodejs-production-process-supervisor/
{_} = require 'underscore'
child_process = require 'child_process'
async = require 'async'
healthCheckInterval = 60 * 1000
bounceInterval = 60 * 1000
bounceWait = bounceInterval + 30 * 1000
delayTimeout = (ms, func) -> setTimeout func, ms
class MonitoredChild
constructor: (@script, @port, @healthCheck, @environmentVariables) ->
@currentChild = null
@healthCheckTimeout = null
@bounceTimeout = null
@expectedExit = false
bounce: ->
return @respawn() unless @currentChild?
console.log "Requested bounce of #{@currentChild.pid}, port #{@port}"
clearTimeout @healthCheckTimeout
@expectedExit = true
@currentChild.kill()
@bounceTimeout = delayTimeout bounceInterval, =>
console.error "Child did not exit in time, forcefully killing it"
@currentChild.kill "SIGKILL"
delayedHealthCheck: ->
@healthCheckTimeout = delayTimeout config.healthCheckInterval, =>
start = new Date()
@healthCheck @port, (healthy) =>
if healthy
console.log "#{@port} is healthy - ping time #{new Date() - start}ms"
@delayedHealthCheck()
else
console.error "#{@port} did not respond in time, killing it harshly"
@currentChild.kill "SIGKILL"
respawn: ->
@currentChild = child_process.spawn process.execPath, [@script],
env: _.extend(@environmentVariables, process.env)
console.log "Started child", {port: @port, pid: @currentChild.pid}
@currentChild.stdout.pipe process.stdout
@currentChild.stderr.pipe process.stderr
@currentChild.on 'exit', (code, signal) =>
clearTimeout @healthCheckTimeout if @healthCheckTimeout?
clearTimeout @bounceTimeout if @bounceTimeout?
if @expectedExit
@expectedExit = false
console.info "Expected exit from child #{@currentChild.pid}, port #{@port} - respawning"
else
console.error "Child #{@currentChild.pid}, port #{@port} exited with code #{code}, signal #{signal}, respawning"
@respawn()
@delayedHealthCheck()
exports.bounceChildren = (monitoredChildren, callback) ->
async.forEachSeries monitoredChildren,
(monitoredChild, seriesCallback) ->
monitoredChild.bounce()
delayTimeout bounceWait, seriesCallback
callback
exports.spawnMonitoredChild = (script, port, healthCheck, environmentVariables) ->
ret = new MonitoredChild(script, port, healthCheck, environmentVariables)
ret.respawn()
ret
healthCheck = (port, cb) ->
c = net.connect port, 'localhost'
c.setEncoding "utf8"
gotAuth = false
c.on 'data', (data) ->
d = null
try
d = JSON.parse(data)
catch error
c.end()
console.error "Health check failed: bad initial response, #{data}"
return cb(false)
if !gotAuth
if d.cmd == "PLSAUTH"
gotAuth = true
c.write JSON.stringify({cmd:"RING"}) + "\r\n"
else
c.end()
console.error "Health check failed: bad initial response, #{data}"
return cb(false)
else
c.end()
console.info "Health check response", {res: d}
return cb(true)
c.on 'error', (e) ->
console.error "Health check failed: error connecting #{e}"
cb(false)
c.setTimeout config.healthCheckTimeout, -> c.destroy()
numWorkers = 2
startPort = 31337
children = []
for i in [0..numWorkers-1]
port = startPort + i
children.push(child_monitor.spawnMonitoredChild './lib/sfs_socket', "sfs_socket_#{port}", healthCheck, {SFS_SOCKET_PORT: port, SFS_SOCKET_HOST: socketHost})
process.on "SIGHUP", ->
console.log "Received SIGHUP, respawning children"
child_monitor.bounceChildren(children)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment