Created
June 1, 2012 02:33
-
-
Save jhjguxin/2848201 to your computer and use it in GitHub Desktop.
How Monit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################### | |
#操作系统CentOS 5 | |
#1:#yum –y install monit | |
#2:修改主配置文件 | |
#vi /etc/monit.conf | |
#################################################################### ########## | |
#黄色部分是上次修改过的,根据实际情况配置 | |
############################################################################### | |
## Monit control file | |
############################################################################### | |
## | |
## Comments begin with a '#' and extend through the end of the line. Keywords | |
## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'. | |
## | |
## Below you will find examples of some frequently used statements. For | |
## information about the control file and a complete list of statements and | |
## options, please have a look in the Monit manual. | |
## | |
## | |
############################################################################### | |
## Global section | |
############################################################################### | |
## | |
## Start Monit in the background (run as a daemon): | |
# | |
set daemon 30 start delay 60 //设置monit为后台进程,每30秒为一个检查周期,首次启动后4分钟开始第一次检查 | |
# check services at 2-minute intervals | |
# with start delay 240 # optional: delay the first check by 4-minutes (by | |
# # default Monit check immediately after Monit start) | |
# | |
# | |
## Set syslog logging with the 'daemon' facility. If the FACILITY option is | |
## omitted, Monit will use 'user' facility by default. If you want to log to | |
## a standalone log file instead, specify the full path to the log file | |
# | |
set logfile syslog facility log_daemon //设置系统日志,这是默认设定,可以手动配置指定日志文件 | |
# | |
# | |
### Set the location of the Monit id file which stores the unique id for the | |
### Monit instance. The id is generated and stored on first Monit start. By | |
### default the file is placed in $HOME/.monit.id. | |
# | |
set idfile /etc/rc.d/init.d/monit //设置monit对象实例,这里的对象是指本机 | |
# | |
### Set the location of the Monit state file which saves monitoring states | |
### on each cycle. By default the file is placed in $HOME/.monit.state. If | |
### the state file is stored on a persistent filesystem, Monit will recover | |
### the monitoring state across reboots. If it is on temporary filesystem, the | |
### state will be lost on reboot which may be convenient in some situations. | |
# | |
set statefile /var/monit/state //设置状态报告文件 | |
# | |
## Set the list of mail servers for alert delivery. Multiple servers may be | |
## specified using a comma separator. By default Monit uses port 25 - it is | |
## possible to override this with the PORT option. | |
# | |
set mailserver mail.surfront.com # primary mailserver //设置邮件服务器,通过它来发送邮件告警 | |
# backup.bar.baz port 10025, # backup mailserver on port 10025 | |
# localhost # fallback relay | |
# | |
# | |
## By default Monit will drop alert events if no mail servers are available. | |
## If you want to keep the alerts for later delivery retry, you can use the | |
## EVENTQUEUE statement. The base directory where undelivered alerts will be | |
## stored is specified by the BASEDIR option. You can limit the maximal queue | |
## size using the SLOTS option (if omitted, the queue is limited by space | |
## available in the back end filesystem). | |
# | |
set eventqueue | |
basedir /var/monit # set the base directory where events will be stored | |
slots 100 # optionally limit the queue size | |
//设置事件报告,每个事件对应的文件最大大小为100KB | |
# | |
# | |
## Send status and events to M/Monit (for more informations about M/Monit | |
## see http://mmonit.com/). By default Monit registers credentials with | |
## M/Monit so M/Monit can smoothly communicate back to Monit and you don't | |
## have to register Monit credentials manually in M/Monit. It is possible to | |
## disable credential registration using the commented out option below. | |
## Though, if safety is a concern we recommend instead using https when | |
## communicating with M/Monit and send credentials encrypted. | |
# | |
# set mmonit http://monit:[email protected]:8080/collector | |
# # and register without credentials # Don't register credentials | |
# | |
# | |
# | |
## Monit by default uses the following alert mail format: | |
## | |
## --8<-- | |
## From: monit@$HOST # sender | |
## Subject: monit alert -- $EVENT $SERVICE # subject | |
## | |
## $EVENT Service $SERVICE # | |
## # | |
## Date: $DATE # | |
## Action: $ACTION # | |
## Host: $HOST # body | |
## Description: $DESCRIPTION # | |
## # | |
## Your faithful employee, # | |
## Monit # | |
## --8<-- | |
## | |
## You can override this message format or parts of it, such as subject | |
## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc. | |
## are expanded at runtime. For example, to override the sender, use: | |
# | |
# set mail-format { from: [email protected] } | |
# | |
# | |
## You can set alert recipients whom will receive alerts if/when a | |
## service defined in this file has errors. Alerts may be restricted on | |
## events by using a filter as in the second example below. | |
# | |
set alert [email protected] # receive all alerts //设置告警要发往的邮箱 | |
set alert [email protected] only on { timeout } # receive just service- //当发送失败后,发送的另外一个备用邮箱 | |
# # timeout alert | |
# | |
# | |
## Monit has an embedded web server which can be used to view status of | |
## services monitored and manage services from a web interface. See the | |
## Monit Wiki if you want to enable SSL for the web server. | |
# | |
set httpd port 2812 and use address localhost //设定http访问端口,只允许本机访问 | |
use address localhost # only accept connection from localhost | |
allow localhost # allow localhost to connect to the server and | |
allow admin:monit # require user 'admin' with password 'monit'//登录用户和密码 | |
allow @monit # allow users of group 'monit' to connect (rw) | |
allow @users readonly # allow users of group 'users' to connect readonly | |
# | |
# | |
############################################################################### | |
## Services | |
############################################################################### | |
## | |
## Check general system resources such as load average, cpu and memory | |
## usage. Each test specifies a resource, conditions and the action to be | |
## performed should a test fail. | |
#检查负载 | |
check system myhost.mydomain.tld | |
if loadavg (1min) > 4 then alert | |
if loadavg (5min) > 2 then alert | |
if memory usage > 75% then alert | |
if swap usage > 25% then alert | |
if cpu usage (user) > 70% then alert | |
if cpu usage (system) > 30% then alert | |
if cpu usage (wait) > 20% then alert | |
# | |
# | |
## Check if a file exists, checksum, permissions, uid and gid. In addition | |
## to alert recipients in the global section, customized alert can be sent to | |
## additional recipients by specifying a local alert handler. The service may | |
## be grouped using the GROUP option. More than one group can be specified by | |
## repeating the 'group name' statement. | |
# | |
# check file apache_bin with path /usr/local/apache/bin/httpd | |
# if failed checksum and | |
# expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor | |
# if failed permission 755 then unmonitor | |
# if failed uid root then unmonitor | |
# if failed gid root then unmonitor | |
# alert [email protected] on { | |
# checksum, permission, uid, gid, unmonitor | |
# } with the mail-format { subject: Alarm! } | |
# group server | |
# | |
# | |
## Check that a process is running, in this case Apache, and that it respond | |
## to HTTP and HTTPS requests. Check its resource usage such as cpu and memory, | |
## and number of children. If the process is not running, Monit will restart | |
## it by default. In case the service is restarted very often and the | |
## problem remains, it is possible to disable monitoring using the TIMEOUT | |
## statement. This service depends on another service (apache_bin) which | |
## is defined above. | |
# | |
#########################################+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+ | |
#check process apache with pidfile /usr/local/apache/logs/httpd.pid | |
# start program = "/etc/init.d/httpd start" with timeout 60 seconds | |
# stop program = "/etc/init.d/httpd stop" | |
# if cpu > 60% for 2 cycles then alert | |
# if cpu > 80% for 5 cycles then restart | |
# if totalmem > 200.0 MB for 5 cycles then restart | |
# if children > 250 then restart | |
# if loadavg(5min) greater than 10 for 8 cycles then stop | |
# if failed host www.tildeslash.com port 80 protocol http | |
# and request "/somefile.html" | |
# then restart | |
# if failed port 443 type tcpssl protocol http | |
# with timeout 15 seconds | |
# then restart | |
# if 3 restarts within 5 cycles then timeout | |
# depends on apache_bin | |
# group server | |
# | |
check process sshd with pidfile /var/run/sshd.pid | |
start program "/etc/init.d/sshd start" | |
stop program "/etc/init.d/sshd stop" | |
if failed port 22 protocol ssh then restart | |
if 5 restarts within 5 cycles then timeout | |
check process mysqld with pidfile /var/run/mysqld/mysqld.pid | |
group database | |
start program = "/etc/init.d/mysqld start" | |
stop program = "/etc/init.d/mysqld stop" | |
if failed host 127.0.0.1 port 3306 then restart | |
if 5 restarts within 5 cycles then timeout | |
check process surfront with pidfile /var/run/surfront.pid | |
start program = "/etc/init.d/surfd start" | |
stop program = "/etc/init.d/surfd stop" | |
if failed host 127.0.0.1 port 15902 then restart | |
if 5 restarts within 5 cycles then timeout | |
check process surfadmin with pidfile /var/run/admin.pid | |
start program = "/etc/init.d/surfd start" | |
stop program = "/etc/init.d/surfd stop" | |
if failed host 127.0.0.1 port 15919 then restart | |
if 5 restarts within 5 cycles then timeout | |
check process surfguard with pidfile /var/run/surfguard.pid | |
start program = "/etc/init.d/watchdog start" | |
stop program = "/etc/init.d/watchdog stop" | |
if failed host 127.0.0.1 port 16217 then restart | |
if 5 restarts within 5 cycles then timeout | |
check process crond with pidfile /var/run/crond.pid | |
start program = "/etc/init.d/crond start" | |
stop program = "/etc/init.d/crond stop" | |
if failed host 127.0.0.1 port 18973 then restart | |
if 5 restarts within 5 cycles then timeout | |
#check process nginx with pidfile /var/run/nginx.pid | |
# start program = "/etc/init.d/nginx start" | |
# stop program = "/etc/init.d/nginx stop" | |
# if failed host 127.0.0.1 port 80 then restart | |
# if 5 restarts within 5 cycles then timeout | |
#check process php_cgi with pidfile /var/run/php_cgi.pid | |
# start program = "/etc/init.d/php_cgi start" | |
# stop program = "/etc/init.d/php_cgi stop" | |
# if failed host 127.0.0.1 port 9000 then restart | |
# if 5 restarts within 5 cycles then timeout | |
# | |
## Check filesystem permissions, uid, gid, space and inode usage. Other services, | |
## such as databases, may depend on this resource and an automatically graceful | |
## stop may be cascaded to them before the filesystem will become full and data | |
## lost. | |
# | |
# check filesystem datafs with path /dev/sdb1 | |
# start program = "/bin/mount /data" | |
# stop program = "/bin/umount /data" | |
# if failed permission 660 then unmonitor | |
# if failed uid root then unmonitor | |
# if failed gid disk then unmonitor | |
# if space usage > 80% for 5 times within 15 cycles then alert | |
# if space usage > 99% then stop | |
# if inode usage > 30000 then alert | |
# if inode usage > 99% then stop | |
# group server | |
# | |
# | |
## Check a file's timestamp. In this example, we test if a file is older | |
## than 15 minutes and assume something is wrong if its not updated. Also, | |
## if the file size exceed a given limit, execute a script | |
# | |
# check file database with path /data/mydatabase.db | |
# if failed permission 700 then alert | |
# if failed uid data then alert | |
# if failed gid data then alert | |
# if timestamp > 15 minutes then alert | |
# if size > 100 MB then exec "/my/cleanup/script" as uid dba and gid dba | |
# | |
# | |
## Check directory permission, uid and gid. An event is triggered if the | |
## directory does not belong to the user with uid 0 and gid 0. In addition, | |
## the permissions have to match the octal description of 755 (see chmod(1)). | |
# | |
# check directory bin with path /bin | |
# if failed permission 755 then unmonitor | |
# if failed uid 0 then unmonitor | |
# if failed gid 0 then unmonitor | |
# | |
# | |
## Check a remote host availability by issuing a ping test and check the | |
## content of a response from a web server. Up to three pings are sent and | |
## connection to a port and an application level network check is performed. | |
# | |
# check host myserver with address 192.168.1.1 | |
# if failed icmp type echo count 3 with timeout 3 seconds then alert | |
# if failed port 3306 protocol mysql with timeout 15 seconds then alert | |
# if failed url http://user:[email protected]:8080/?querystring | |
# and content == 'action="j_security_check"' | |
# then alert | |
# | |
# | |
############################################################################### | |
## Includes | |
############################################################################### | |
## | |
## It is possible to include additional configuration parts from other files or | |
## directories. | |
# | |
#include /etc/monit.d/* | |
# | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment