Created
July 8, 2014 06:37
-
-
Save pzgz/55bb6a54e5f1a8a1ad8b to your computer and use it in GitHub Desktop.
Ganlia Sample Configuration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is an example of a Ganglia Meta Daemon configuration file | |
# http://ganglia.sourceforge.net/ | |
# | |
# $Id: gmetad.conf 667 2006-07-20 08:49:41Z knobi1 $ | |
# | |
#------------------------------------------------------------------------------- | |
# Setting the debug_level to 1 will keep daemon in the forground and | |
# show only error messages. Setting this value higher than 1 will make | |
# gmetad output debugging information and stay in the foreground. | |
# default: 0 | |
# debug_level 10 | |
# | |
#------------------------------------------------------------------------------- | |
# What to monitor. The most important section of this file. | |
# | |
# The data_source tag specifies either a cluster or a grid to | |
# monitor. If we detect the source is a cluster, we will maintain a complete | |
# set of RRD databases for it, which can be used to create historical | |
# graphs of the metrics. If the source is a grid (it comes from another gmetad), | |
# we will only maintain summary RRDs for it. | |
# | |
# Format: | |
# data_source "my cluster" [polling interval] address1:port addreses2:port ... | |
# | |
# The keyword 'data_source' must immediately be followed by a unique | |
# string which identifies the source, then an optional polling interval in | |
# seconds. The source will be polled at this interval on average. | |
# If the polling interval is omitted, 15sec is asssumed. | |
# | |
# A list of machines which service the data source follows, in the | |
# format ip:port, or name:port. If a port is not specified then 8649 | |
# (the default gmond port) is assumed. | |
# default: There is no default value | |
# | |
# data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655 | |
# data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651 | |
# data_source "another source" 1.3.4.7:8655 1.3.4.8 | |
data_source "Hadoop" dspprod-m:8649 | |
data_source "Console" dspprod-c:8649 | |
data_source "App" dspprod-a1:8649 | |
gridname "Allyes Dsp Hangzhou" | |
# 15 seconds for 1 month, 6 minutes for 1 year | |
RRAs "RRA:AVERAGE:0.5:1:178560" "RRA:AVERAGE:0.5:24:88800" | |
rrd_rootdir "/data/ganglia/rrds" | |
# | |
# Round-Robin Archives | |
# You can specify custom Round-Robin archives here (defaults are listed below) | |
# | |
# RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \ | |
# "RRA:AVERAGE:0.5:5760:374" | |
# | |
# | |
#------------------------------------------------------------------------------- | |
# Scalability mode. If on, we summarize over downstream grids, and respect | |
# authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output | |
# in <GRID></GRID> tags, we ignore all <GRID> tags we see, and always assume | |
# we are the "authority" on data source feeds. This approach does not scale to | |
# large groups of clusters, but is provided for backwards compatibility. | |
# default: on | |
# scalable off | |
# | |
#------------------------------------------------------------------------------- | |
# The name of this Grid. All the data sources above will be wrapped in a GRID | |
# tag with this name. | |
# default: Unspecified | |
# gridname "MyGrid" | |
# | |
#------------------------------------------------------------------------------- | |
# The authority URL for this grid. Used by other gmetads to locate graphs | |
# for our data sources. Generally points to a ganglia/ | |
# website on this machine. | |
# default: "http://hostname/ganglia/", | |
# where hostname is the name of this machine, as defined by gethostname(). | |
# authority "http://mycluster.org/newprefix/" | |
# | |
#------------------------------------------------------------------------------- | |
# List of machines this gmetad will share XML with. Localhost | |
# is always trusted. | |
# default: There is no default value | |
# trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org | |
# | |
#------------------------------------------------------------------------------- | |
# If you want any host which connects to the gmetad XML to receive | |
# data, then set this value to "on" | |
# default: off | |
# all_trusted on | |
# | |
#------------------------------------------------------------------------------- | |
# If you don't want gmetad to setuid then set this to off | |
# default: on | |
# setuid off | |
# | |
#------------------------------------------------------------------------------- | |
# User gmetad will setuid to (defaults to "ganglia") | |
# default: "ganglia" | |
# setuid_username "ganglia" | |
# | |
#------------------------------------------------------------------------------- | |
# The port gmetad will answer requests for XML | |
# default: 8651 | |
# xml_port 8651 | |
# | |
#------------------------------------------------------------------------------- | |
# The port gmetad will answer queries for XML. This facility allows | |
# simple subtree and summation views of the XML tree. | |
# default: 8652 | |
# interactive_port 8652 | |
# | |
#------------------------------------------------------------------------------- | |
# The number of threads answering XML requests | |
# default: 4 | |
# server_threads 10 | |
# | |
#------------------------------------------------------------------------------- | |
# Where gmetad stores its round-robin databases | |
# default: "/var/lib/ganglia/rrds" | |
# rrd_rootdir "/some/other/place" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* This configuration is as close to 2.5.x default behavior as possible | |
The values closely match ./gmond/metric.h definitions in 2.5.x */ | |
globals { | |
daemonize = yes | |
setuid = yes | |
user = ganglia | |
debug_level = 0 | |
max_udp_msg_len = 1472 | |
mute = no | |
deaf = no | |
allow_extra_data = yes | |
host_dmax = 86400 /*secs, set this to remove dead host */ | |
cleanup_threshold = 300 /*secs */ | |
gexec = no | |
send_metadata_interval = 30 /*secs, set this, or sometimes gmond will stuck */ | |
} | |
/* | |
* The cluster attributes specified will be used as part of the <CLUSTER> | |
* tag that will wrap all hosts collected by this instance. | |
*/ | |
cluster { | |
name = "cluster_name_to_change" | |
owner = "Allyes" | |
latlong = "unspecified" | |
url = "unspecified" | |
} | |
/* The host section describes attributes of the host, like the location */ | |
host { | |
location = "unspecified" | |
} | |
/* Feel free to specify as many udp_send_channels as you like. Gmond | |
used to only support having a single channel */ | |
udp_send_channel { | |
#bind_hostname = yes # Highly recommended, soon to be default. | |
# This option tells gmond to use a source address | |
# that resolves to the machine's hostname. Without | |
# this, the metrics may appear to come from any | |
# interface and the DNS names associated with | |
# those IPs will be used to create the RRDs. | |
host = host_name_to_change | |
port = 8649 | |
ttl = 1 | |
} | |
/* You can specify as many udp_recv_channels as you like as well. */ | |
udp_recv_channel { | |
port = 8649 | |
} | |
/* You can specify as many tcp_accept_channels as you like to share | |
an xml description of the state of the cluster */ | |
tcp_accept_channel { | |
port = 8649 | |
} | |
/* Each metrics module that is referenced by gmond must be specified and | |
loaded. If the module has been statically linked with gmond, it does | |
not require a load path. However all dynamically loadable modules must | |
include a load path. */ | |
modules { | |
module { | |
name = "core_metrics" | |
} | |
module { | |
name = "cpu_module" | |
path = "modcpu.so" | |
} | |
module { | |
name = "disk_module" | |
path = "moddisk.so" | |
} | |
module { | |
name = "load_module" | |
path = "modload.so" | |
} | |
module { | |
name = "mem_module" | |
path = "modmem.so" | |
} | |
module { | |
name = "net_module" | |
path = "modnet.so" | |
} | |
module { | |
name = "proc_module" | |
path = "modproc.so" | |
} | |
module { | |
name = "sys_module" | |
path = "modsys.so" | |
} | |
} | |
include ('/etc/ganglia/conf.d/*.conf') | |
/* The old internal 2.5.x metric array has been replaced by the following | |
collection_group directives. What follows is the default behavior for | |
collecting and sending metrics that is as close to 2.5.x behavior as | |
possible. */ | |
/* This collection group will cause a heartbeat (or beacon) to be sent every | |
20 seconds. In the heartbeat is the GMOND_STARTED data which expresses | |
the age of the running gmond. */ | |
collection_group { | |
collect_once = yes | |
time_threshold = 20 | |
metric { | |
name = "heartbeat" | |
} | |
} | |
/* This collection group will send general info about this host every | |
1200 secs. | |
This information doesn't change between reboots and is only collected | |
once. */ | |
collection_group { | |
collect_once = yes | |
time_threshold = 1200 | |
metric { | |
name = "cpu_num" | |
title = "CPU Count" | |
} | |
metric { | |
name = "cpu_speed" | |
title = "CPU Speed" | |
} | |
metric { | |
name = "mem_total" | |
title = "Memory Total" | |
} | |
/* Should this be here? Swap can be added/removed between reboots. */ | |
metric { | |
name = "swap_total" | |
title = "Swap Space Total" | |
} | |
metric { | |
name = "boottime" | |
title = "Last Boot Time" | |
} | |
metric { | |
name = "machine_type" | |
title = "Machine Type" | |
} | |
metric { | |
name = "os_name" | |
title = "Operating System" | |
} | |
metric { | |
name = "os_release" | |
title = "Operating System Release" | |
} | |
metric { | |
name = "location" | |
title = "Location" | |
} | |
} | |
/* This collection group will send the status of gexecd for this host | |
every 300 secs.*/ | |
/* Unlike 2.5.x the default behavior is to report gexecd OFF. */ | |
collection_group { | |
collect_once = yes | |
time_threshold = 300 | |
metric { | |
name = "gexec" | |
title = "Gexec Status" | |
} | |
} | |
/* This collection group will collect the CPU status info every 20 secs. | |
The time threshold is set to 90 seconds. In honesty, this | |
time_threshold could be set significantly higher to reduce | |
unneccessary network chatter. */ | |
collection_group { | |
collect_every = 20 | |
time_threshold = 90 | |
/* CPU status */ | |
metric { | |
name = "cpu_user" | |
value_threshold = "1.0" | |
title = "CPU User" | |
} | |
metric { | |
name = "cpu_system" | |
value_threshold = "1.0" | |
title = "CPU System" | |
} | |
metric { | |
name = "cpu_idle" | |
value_threshold = "5.0" | |
title = "CPU Idle" | |
} | |
metric { | |
name = "cpu_nice" | |
value_threshold = "1.0" | |
title = "CPU Nice" | |
} | |
metric { | |
name = "cpu_aidle" | |
value_threshold = "5.0" | |
title = "CPU aidle" | |
} | |
metric { | |
name = "cpu_wio" | |
value_threshold = "1.0" | |
title = "CPU wio" | |
} | |
/* The next two metrics are optional if you want more detail... | |
... since they are accounted for in cpu_system. | |
metric { | |
name = "cpu_intr" | |
value_threshold = "1.0" | |
title = "CPU intr" | |
} | |
metric { | |
name = "cpu_sintr" | |
value_threshold = "1.0" | |
title = "CPU sintr" | |
} | |
*/ | |
} | |
collection_group { | |
collect_every = 20 | |
time_threshold = 90 | |
/* Load Averages */ | |
metric { | |
name = "load_one" | |
value_threshold = "1.0" | |
title = "One Minute Load Average" | |
} | |
metric { | |
name = "load_five" | |
value_threshold = "1.0" | |
title = "Five Minute Load Average" | |
} | |
metric { | |
name = "load_fifteen" | |
value_threshold = "1.0" | |
title = "Fifteen Minute Load Average" | |
} | |
} | |
/* This group collects the number of running and total processes */ | |
collection_group { | |
collect_every = 80 | |
time_threshold = 950 | |
metric { | |
name = "proc_run" | |
value_threshold = "1.0" | |
title = "Total Running Processes" | |
} | |
metric { | |
name = "proc_total" | |
value_threshold = "1.0" | |
title = "Total Processes" | |
} | |
} | |
/* This collection group grabs the volatile memory metrics every 40 secs and | |
sends them at least every 180 secs. This time_threshold can be increased | |
significantly to reduce unneeded network traffic. */ | |
collection_group { | |
collect_every = 40 | |
time_threshold = 180 | |
metric { | |
name = "mem_free" | |
value_threshold = "1024.0" | |
title = "Free Memory" | |
} | |
metric { | |
name = "mem_shared" | |
value_threshold = "1024.0" | |
title = "Shared Memory" | |
} | |
metric { | |
name = "mem_buffers" | |
value_threshold = "1024.0" | |
title = "Memory Buffers" | |
} | |
metric { | |
name = "mem_cached" | |
value_threshold = "1024.0" | |
title = "Cached Memory" | |
} | |
metric { | |
name = "swap_free" | |
value_threshold = "1024.0" | |
title = "Free Swap Space" | |
} | |
} | |
collection_group { | |
collect_every = 40 | |
time_threshold = 300 | |
metric { | |
name = "bytes_out" | |
value_threshold = 4096 | |
title = "Bytes Sent" | |
} | |
metric { | |
name = "bytes_in" | |
value_threshold = 4096 | |
title = "Bytes Received" | |
} | |
metric { | |
name = "pkts_in" | |
value_threshold = 256 | |
title = "Packets Received" | |
} | |
metric { | |
name = "pkts_out" | |
value_threshold = 256 | |
title = "Packets Sent" | |
} | |
} | |
/* Different than 2.5.x default since the old config made no sense */ | |
collection_group { | |
collect_every = 1800 | |
time_threshold = 3600 | |
metric { | |
name = "disk_total" | |
value_threshold = 1.0 | |
title = "Total Disk Space" | |
} | |
} | |
collection_group { | |
collect_every = 40 | |
time_threshold = 180 | |
metric { | |
name = "disk_free" | |
value_threshold = 1.0 | |
title = "Disk Space Available" | |
} | |
metric { | |
name = "part_max_used" | |
value_threshold = 1.0 | |
title = "Maximum Disk Space Used" | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment