Skip to content

Instantly share code, notes, and snippets.

@ypujante
Created March 19, 2011 16:38
Show Gist options
  • Save ypujante/877590 to your computer and use it in GitHub Desktop.
Save ypujante/877590 to your computer and use it in GitHub Desktop.
/*
* Copyright (c) 2011 Yan Pujante
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/*
build.gradle for compiling this code
------------------------------------
apply plugin: 'groovy'
dependencies {
compile "org.linkedin:org.linkedin.glu.agent-tracker:2.1.1"
groovy "org.codehaus.groovy:groovy:1.7.5"
runtime "org.slf4j:slf4j-log4j12:1.5.8"
runtime "log4j:log4j:1.2.16"
}
*/
package org.linkedin.glu.agent.monitor
import org.linkedin.util.clock.Timespan
import org.linkedin.zookeeper.client.ZKClient
import org.linkedin.glu.agent.tracker.AgentsTrackerImpl
import org.linkedin.glu.agent.tracker.TrackerEventsListener
import org.linkedin.glu.agent.tracker.NodeEvent
import org.apache.zookeeper.WatchedEvent
import org.linkedin.zookeeper.tracker.ErrorListener
import org.linkedin.glu.agent.tracker.AgentInfo
import org.linkedin.glu.agent.tracker.MountPointInfo
import org.linkedin.zookeeper.tracker.NodeEventType
import org.linkedin.glu.agent.api.MountPoint
/**
* This is the main monitoring class used in the blog post:
* http://www.pongasoft.com/blog/yan/glu/2011/03/18/building-monitoring-solution-with-glu/
*
* @author [email protected] */
public class MonitorMain
{
public static final String MODULE = MonitorMain.class.getName();
public static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(MODULE);
def zk = "localhost:2181"
def fabric = "glu-dev-1"
protected final Object _lock = new Object()
protected volatile boolean _receivedShutdown = false
void run()
{
// convenient way to connect to ZooKeeper
def zkClient = new ZKClient(zk, Timespan.parse("30s"), null)
zkClient.start()
zkClient.waitForStart(Timespan.parse('5s'))
// register a shutdown hook to properly shutdown the connection with ZooKeeper
addShutdownHook {
synchronized(_lock) {
_receivedShutdown = true
_lock.notify()
}
zkClient.destroy()
}
// create a tracker
def tracker = new AgentsTrackerImpl(zkClient,
"/org/glu/agents/fabrics/${fabric}".toString())
// register the listeners
tracker.registerAgentListener(agentEvents as TrackerEventsListener)
tracker.registerMountPointListener(mountPointEvents as TrackerEventsListener)
tracker.registerErrorListener(errorListener as ErrorListener)
// start the tracker
tracker.start()
tracker.waitForStart()
// wait until CTL-C is issued
awaitTermination()
}
void awaitTermination()
{
synchronized (_lock) {
while(!_receivedShutdown)
{
_lock.wait()
}
}
}
// track agent events (agent up or down)
def agentEvents = { Collection<NodeEvent<AgentInfo>> events ->
events.each { NodeEvent<AgentInfo> event ->
switch(event.eventType)
{
case NodeEventType.ADDED:
case NodeEventType.UPDATED:
// ignoring add/update event
break
case NodeEventType.DELETED:
log.warn "Detected agent down: ${event.nodeInfo.agentName}"
break
}
}
}
// track mountPoint events
def mountPointEvents = { Collection<NodeEvent<MountPointInfo>> events ->
events.each { NodeEvent<MountPointInfo> event ->
// only interested in the monitor events (/monitor was defined in
// the console entry)
if(event?.nodeInfo?.mountPoint == MountPoint.fromPath('/monitor'))
{
switch(event.eventType)
{
case NodeEventType.ADDED:
case NodeEventType.UPDATED:
log.info "${event.nodeInfo.agentName} | ${event.nodeInfo.data?.scriptState?.script?.load?.join(',')}"
break
case NodeEventType.DELETED:
// ignoring delete event
break
}
}
}
}
def errorListener = { WatchedEvent event, Throwable throwable ->
log.warn("Error detected in agent with ${event}", throwable)
}
static void main(args)
{
def mm = new MonitorMain(zk: "localhost:2181", fabric: "glu-dev-1")
mm.run()
}
}
/*
* Copyright (c) 2011 Yan Pujante
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* This is the glu script used in the blog post:
* http://www.pongasoft.com/blog/yan/glu/2011/03/18/building-monitoring-solution-with-glu/
* @author [email protected] */
class MonitorScript
{
static String CMD = "uptime | grep -o '[0-9]\\+\\.[0-9]\\+*' | xargs"
// load is automatically available in ZooKeeper
def load
def monitor = {
// capturing current state
def currentError = stateManager.state.error
def newError = null
// running uptime => return load average
def uptime = shell.exec(CMD)
log.info uptime
load = uptime.split()
// load average for the last minute is load[0]
// check for load (provided optionally as an init parameter)
if((load[0] as float) >= ((params.maxLoad ?: 4.0) as float))
newError = "High load detected..."
// when error changes => update the state to propagate to ZooKeeper
if(currentError != newError) {
stateManager.forceChangeState(null, newError)
if(newError)
log.warn "High load detected [${load[0]}]..."
else
log.info "Everything back to normal."
}
}
def start = {
timers.schedule(timer: monitor, repeatFrequency: params.frequency ?: "5s")
}
def stop = {
timers.cancel(timer: monitor)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment