Nagios监控mongodb分片集群服务实战(一)

2014-11-24 00:33:34 · 作者: · 浏览: 30
1,监控插件 下载
Mongodb插件下载地址为:git clone git://github.com/mzupan/nagios-plugin-mongodb.git,刚开始本人这里没有安装gitpub环境,找网友草根帮忙下载的,之后上传到了csdn资源页面,新的下载地址为:http://download.csdn.net/detail/mchdba/8019077

2,添加新的mongodb监控命令

因为mongodb服务是和mysql从库公用一台物理机,之前已经做了基础nagios以及mysql服务监控,所以这里只需要在原来的基础上添加mongodb命令和服务即可。Nagios监控mysql请参考:http://blog.itpub.net/26230597/viewspace-760141/以及http://blog.itpub.net/26230597/viewspace-1217246/。所以这里需要添加的mongodb监控命令如下所示:

[root@wgq objects]# cd /usr/local/nagios/etc/objects
[root@wgq objects]# vim commands.cfg
define command {
    command_name check_mongodb
    command_line $USER1$/nagios-plugin-mongodb/check_mongodb.py -H $HOSTADDRESS$ -A $ARG1$ -P $ARG2$ -W $ARG3$ -C $ARG4$
}

define command {
    command_name check_mongodb_database
    command_line $USER1$/nagios-plugin-mongodb/check_mongodb.py -H $HOSTADDRESS$ -A $ARG1$ -P $ARG2$ -W $ARG3$ -C $ARG4$ -d $ARG5$
}

define command {
    command_name check_mongodb_collection
    command_line $USER1$/nagios-plugin-mongodb/check_mongodb.py -H $HOSTADDRESS$ -A $ARG1$ -P $ARG2$ -W $ARG3$ -C $ARG4$ -d $ARG5$ -c $ARG6$
}

define command {
    command_name check_mongodb_replicaset
    command_line $USER1$/nagios-plugin-mongodb/check_mongodb.py -H $HOSTADDRESS$ -A $ARG1$ -P $ARG2$ -W $ARG3$ -C $ARG4$ -r $ARG5$
}

define command {
    command_name check_mongodb_query
    command_line $USER1$/nagios-plugin-mongodb/check_mongodb.py -H $HOSTADDRESS$ -A $ARG1$ -P $ARG2$ -W $ARG3$ -C $ARG4$ -q $ARG5$
}
3,添加mongodb监控服务mongodb的服务也需要单独重新添加,如下所示:
#检测mongodb服务的连接时间,超过2秒就普通报警,5秒就严重报警
define service{
        host_name dbm1slave1
        service_description Mong
o Connect Check check_command check_mongodb!connect!30000!2!5 max_check_attempts 5 normal_check_interval 3 retry_check_interval 2 check_period 24x7 notification_interval 10 notification_period 24x7 notification_options w,u,c,r contact_groups ops } #检查mongodb的连接数,超过150普通报警,200严重报警 define service{ host_name dbm1slave1 service_description Mongo Free Connections check_command check_mongodb!connections!27017!70!80 max_check_attempts 5 normal_check_interval 3 retry_check_interval 2 check_period 24x7 notification_interval 10 notification_period 24x7 notification_options w,u,c,r contact_groups ops } #检查mongodb复制完成的百分比率,确保primary和standby的time是一致的。 define service{ host_name dbm1slave1 service_description Mongo Replication Lag check_command check_mongodb!replication_lag!27017!15!30 max_check_attempts 5 normal_check_interval 3 retry_check_interval 2 check_period 24x7 notification_interval 10 notification_period 24x7 notification_options w,u,c,r contact_groups ops } #检查mongodb内存使用率,阀值与mongodb所在机器的总内存数相关 define service{ host_name dbm1slave1 service_description Mongo Memory Usage check_command check_mongodb!memory!27017!20!28 max_check_attempts 5 normal_check_interval 3 retry_check_interval 2 check_period 24x7 notification_interval 10 notification_period 24x7 notification_options w,u,c,r contact_groups ops } #检查mongodb Mapped的内存使用率,阀值与mongodb所在机器的总内存数相关 define service{ host_name dbm1slave1 service_description Mongo Mapped Memory Usage check_command check_mongodb!memory_mappe