[关闭]
@yanglt7 2018-10-21T15:58:45.000000Z 字数 27699 阅读 1046

【Web 集群实战】22_Nagios

Web集群实战


1. Nagios 监控工具及原理

1.1 Nagios 特点

1.2 Nagios 监控构成

2. Nagios 服务器端安装

2.1 Nagios 安装准备

(1)准备 3 台服务器或 VM 虚拟机

HOSTNAME IP 说明
nagios-server 192.168.2.151 Nagios 服务器端
web001 192.168.2.152 被监控的客户端服务器
web002 192.168.2.144 被监控的客户端服务器

(2)设置 yum 安装源

  1. [root@nagios-server ~]# cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.bak
  2. [root@nagios-server ~]# wget /etc/yum.repos.d/CentOS-Base.repo http://mirrors.163.com/.help/CentOS7-Base-163.repo

(3) 解决 Perl 软件编译问题

  1. [root@nagios-server ~]# echo 'export LC_ALL=C'>> /etc/profile
  2. [root@nagios-server ~]# tail -1 /etc/profile
  3. export LC_ALL=C
  4. [root@nagios-server ~]# source /etc/profile
  5. [root@nagios-server ~]# echo $LC_ALL
  6. C

(4)关闭 Nagios Server 端防火墙及 SELinux

  1. [root@nagios-server ~]# systemctl disable firewalld.service
  2. [root@nagios-server ~]# systemctl stop firewalld.service
  3. [root@nagios-server ~]# systemctl status firewalld.service
  4. * firewalld.service - firewalld - dynamic firewall daemon
  5. Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)
  6. Active: inactive (dead)
  7. Docs: man:firewalld(1)
  8. [root@nagios-server ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
  9. # 修改配置文件可使配置永久生效,需重启系统
  10. [root@nagios-server ~]# cat /etc/selinux/config|grep SELINUX=disabled
  11. SELINUX=disabled
  12. [root@nagios-server ~]# getenforce
  13. Disabled

(5)解决系统时间同步问题

  1. [root@nagios-server ~]# echo '#time sync by nagios-server at 2018-09-16' >>/var/spool/cron/root
  2. [root@nagios-server ~]# echo '*/5 * * * * /usr/sbin/ntpdate ntp1.aliyun.com >/dev/null 2&1' >> /var/spool/cron/root
  3. [root@nagios-server ~]# crontab -l
  4. #time sync by nagios-server at 2018-10-13
  5. */2 * * * * /usr/sbin/ntpdate ntp1.aliyun.com
  6. >/dev/null 2>&1

(6) 安装 Nagios 服务器端所需软件包(LAMP 环境)

  1. [root@nagios-server ~]# yum install gcc glibc glibc-common -y
  2. [root@nagios-server ~]# yum install gd gd-devel -y
  3. [root@nagios-server ~]# yum install httpd php php-gd -y
  4. [root@nagios-server ~]# rpm -qa httpd php
  5. httpd-2.4.6-80.el7.centos.1.x86_64
  6. php-5.4.16-45.el7.x86_64

MySQL 安装参见【Web 集群实战】12_LNMP 之 MySQL 的安装与配置

(7)创建 Nagios 服务器端需要的用户及组

  1. [root@nagios-server ~]# /usr/sbin/useradd nagios
  2. [root@nagios-server ~]# /usr/sbin/useradd apache -M -s /sbin/nologin
  3. useradd: user 'apache' already exists
  4. [root@nagios-server ~]# /usr/sbin/groupadd nagcmd
  5. [root@nagios-server ~]# /usr/sbin/usermod -a -G nagcmd nagios
  6. [root@nagios-server ~]# /usr/sbin/usermod -a -G nagcmd apache
  7. [root@nagios-server ~]# id -n -G nagios
  8. nagios nagcmd
  9. [root@nagios-server ~]# id -n -G apache
  10. apache nagcmd
  11. [root@nagios-server ~]# groups nagios
  12. nagios : nagios nagcmd
  13. [root@nagios-server ~]# groups apache
  14. apache : apache nagcmd

(8)下载所需软件包

  1. [root@nagios-server ~]# cd /home/ylt/tools/
  2. [root@nagios-server tools]# mkdir nagios -p
  3. [root@nagios-server tools]# cd nagios/
  4. [root@nagios-server nagios]# wget https://sourceforge.net/projects/nagios/files/nagios-3.x/nagios-3.5.1/nagios-3.5.1.tar.gz/download
  5. [root@nagios-server nagios]# ll
  6. total 1724
  7. -rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz
  8. [root@nagios-server nagios]# wget https://nagios-plugins.org/download/nagios-plugins-2.2.1.tar.gz#_ga=2.27512634.762344303.1539496511-137884230.1539496511
  9. [root@nagios-server nagios]# ll nagios-plugins-2.2.1.tar.gz
  10. -rw-r--r-- 1 root root 2728818 Apr 20 2017 nagios-plugins-2.2.1.tar.gz
  11. [root@nagios-server nagios]# wget https://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download
  12. [root@nagios-server nagios]# ll nrpe-2.12.tar.gz
  13. -rw-r--r-- 1 root root 405725 Mar 11 2008 nrpe-2.12.tar.gz

(9)启动 LAMP 环境的 HTTP 服务

  1. [root@nagios-server tools]# systemctl start httpd
  2. [root@nagios-server tools]# lsof -i:80
  3. COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
  4. httpd 1352 root 4u IPv6 21968 0t0 TCP *:http (LISTEN)
  5. httpd 1353 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)
  6. httpd 1354 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)
  7. httpd 1355 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)
  8. httpd 1356 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)
  9. httpd 1357 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)

2.2 安装 Nagios 服务器端

  1. [root@nagios-server nagios]# tar xf nagios-3.5.1.tar.gz
  2. [root@nagios-server nagios]# ll
  3. total 1728
  4. drwxrwxr-x 15 root root 4096 Aug 31 2013 nagios
  5. -rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz
  6. [root@nagios-server nagios]# cd nagios/
  7. [root@nagios-server nagios]# ./configure --with-command-group=nagcmd
  8. Review the options above for accuracy. If they look okay,
  9. type 'make all' to compile the main program and CGIs.
  10. [root@nagios-server nagios]# make all
  11. Enjoy.
  12. [root@nagios-server nagios]# make install
  13. make install-init
  14. - This installs the init script in /etc/rc.d/init.d
  15. make install-commandmode
  16. - This installs and configures permissions on the
  17. directory for holding the external command file
  18. make install-config
  19. - This installs sample config files in /usr/local/nagios/etc
  20. make[1]: Leaving directory `/home/ylt/tools/nagios/nagios'
  21. [root@nagios-server nagios]# make install-init
  22. *** Init script installed ***
  23. [root@nagios-server nagios]# make install-commandmode
  24. *** External command directory configured ***
  25. [root@nagios-server nagios]# make install-config
  26. *** Config files installed ***

(1) 安装 Nagios Web 配置文件及创建登录用户

  1. [root@nagios-server nagios]# make install-webconf
  2. *** Nagios/Apache conf file installed ***
  1. [root@nagios-server nagios]# cd ..
  2. [root@nagios-server nagios]# htpasswd -bc /usr/local/nagios/etc/htpasswd.users nagios nagios
  3. Adding password for user ylt
  4. [root@nagios-server nagios]# cat /usr/local/nagios/etc/htpasswd.users
  5. nagios:$apr1$l7AGreUZ$LUP7tkFCcLoJ21cACkOvU/
  1. [root@nagios-server nagios]# systemctl reload httpd

(2)添加监控报警信息接收的 Email 地址

  1. [root@nagios-server nagios]# sed -i 's#nagios@localhost#yanglt7@163.com#g' /usr/local/nagios/etc/objects/contacts.cfg
  2. [root@nagios-server nagios]# sed -n '35p' /usr/local/nagios/etc/objects/contacts.cfg
  3. email yanglt7@163.com ;
  1. [root@nagios-server nagios]# tail -2 /etc/mail.rc
  2. set from=1622320046@qq.com
  3. smtp=smtp.qq.com smtp-auth-user=1622320046 smtp-auth-password=password smtp-auto=login

(3)解决 Web 端用户 nagios 没有被许可查看服务资源的问题,将 nagiosadmin 改成 nagios

  1. [root@nagios-server etc]# cat cgi.cfg|grep ^authorized_for
  2. authorized_for_system_information=nagios
  3. authorized_for_configuration_information=nagios
  4. authorized_for_system_commands=nagios
  5. authorized_for_all_services=nagios
  6. authorized_for_all_hosts=nagios
  7. authorized_for_all_service_commands=nagios
  8. authorized_for_all_host_commands=nagios

(4)配置启动 Apache 服务

  1. [root@nagios-server nagios]# systemctl enable httpd
  2. Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib/systemd/system/httpd.service.
  3. [root@nagios-server nagios]# systemctl restart httpd
  4. [root@nagios-server nagios]# netstat -lntup|grep httpd
  5. tcp6 0 0 :::80 :::* LISTEN 1932/httpd

用户名和密码提示窗口

(4)安装 Nagios 插件软件包

  1. [root@nagios-server nagios]# yum install perl-devel openssl-devel -y
  1. [root@nagios-server nagios]# tar xf nagios-plugins-2.2.1.tar.gz
  2. [root@nagios-server nagios]# cd nagios-plugins-2.2.1/
  3. [root@nagios-server nagios-plugins-2.2.1]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql
  4. [root@nagios-server nagios-plugins-2.2.1]# make
  5. [root@nagios-server nagios-plugins-2.2.1]# make install
  1. [root@nagios-server nagios-plugins-2.2.1]# ls /usr/local/nagios/libexec/|wc -l
  2. 62

(5)安装 nrpe 软件

  1. [root@nagios-server nagios-plugins-2.2.1]# ls /usr/local/nagios/libexec/check_nrpe
  2. ls: cannot access /usr/local/nagios/libexec/check_nrpe: No such file or directory
  1. [root@nagios-server nagios-plugins-2.2.1]# cd ../
  2. [root@nagios-server nagios]# tar xf nrpe-2.12.tar.gz
  3. [root@nagios-server nagios]# cd nrpe-2.12/
  4. [root@nagios-server nrpe-2.12]# ./configure
  5. [root@nagios-server nrpe-2.12]# make all
  6. [root@nagios-server nrpe-2.12]# make install-plugin
  7. [root@nagios-server nrpe-2.12]# make install-daemon
  8. [root@nagios-server nrpe-2.12]# make install-daemon-config
  1. [root@nagios-server nagios]# ls /usr/local/nagios/libexec/check_nrpe
  2. /usr/local/nagios/libexec/check_nrpe
  3. [root@nagios-server nagios]# ls /usr/local/nagios/libexec/|wc -l
  4. 63

(6)配置并启动 Nagios 服务

  1. [root@nagios-server nagios]# /sbin/chkconfig nagios on
  2. [root@nagios-server nagios]# chkconfig --list nagios
  3. nagios 0:off 1:off 2:on 3:on 4:on 5:on 6:off
  4. [root@nagios-server ~]# echo "/etc/init.d/nagios start" >>/etc/rc.local
  5. [root@nagios-server ~]# tail -1 /etc/rc.local
  6. /etc/init.d/nagios start
  1. [root@nagios-server ~]# /etc/init.d/nagios checkconfig
  2. Running configuration check... OK.
  3. [root@nagios-server ~]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
  4. Total Warnings: 0
  5. Total Errors: 0
  6. Things look okay - No serious problems were detected during the pre-flight check
  1. [root@nagios-server ~]# grep 'checkconfig)' -n -A 2 /etc/init.d/nagios
  2. 181: checkconfig)
  3. 182- printf "Running configuration check..."
  4. 183- $NagiosBin -v $NagiosCfgFile > /dev/null 2>&1;
  5. # 删除脚本中的 > /dev/null 2>&1
  6. [root@nagios-server ~]# vim /etc/init.d/nagios
  7. [root@nagios-server ~]# grep 'checkconfig)' -n -A 2 /etc/init.d/nagios
  8. 181: checkconfig)
  9. 182- printf "Running configuration check..."
  10. 183- $NagiosBin -v $NagiosCfgFile;
  1. [root@nagios-server ~]# /etc/init.d/nagios checkconfig
  2. Total Warnings: 0
  3. Total Errors: 0
  4. Things look okay - No serious problems were detected during the pre-flight check
  5. OK.
  1. [root@nagios-server ~]# /etc/init.d/nagios restart
  2. Restarting nagios (via systemctl): Warning: nagios.service changed on disk. Run 'systemctl daemon-reload' to reload units.
  3. [ OK ]
  4. [root@nagios-server ~]# systemctl daemon-reload
  5. [root@nagios-server ~]# /etc/init.d/nagios restart
  6. Restarting nagios (via systemctl): [ OK ]
  7. [root@nagios-server ~]# ps -ef|grep nagios|grep -v grep
  8. nagios 1408 1 0 15:54 ? 00:00:00 /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg
  9. [root@nagios-server ~]# netstat -lntup|grep nagios
  10. # 无输出

3. Nagios 客户端安装

3.1 Nagios 客户端安装准备

(1)准备 2 台服务器或 VM 虚拟机

HOSTNAME IP 说明
web001 192.168.2.152 被监控的客户端服务器
web002 192.168.2.144 被监控的客户端服务器

(2)环境准备和服务器端步骤相同

3.2 在 Nagios 客户端安装软件

(1)下载所需软件包

  1. [root@web001 ~]# yum install gcc glibc-common -y
  2. [root@web001 ~]# mkdir /home/ylt/tools/nagios
  3. [root@web001 ~]# cd /home/ylt/tools/nagios
  4. [root@web001 nagios]# wget https://sourceforge.net/projects/nagios/files/nagios-3.x/nagios-3.5.1/nagios-3.5.1.tar.gz/download
  5. [root@web001 nagios]# wget https://nagios-plugins.org/download/nagios-plugins-2.2.1.tar.gz#_ga=2.27512634.762344303.1539496511-137884230.1539496511
  6. [root@web001 nagios]# wget https://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download
  7. [root@web001 nagios]# ll
  8. total 4792
  9. -rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz
  10. -rw-r--r-- 1 root root 2728818 Apr 20 2017 nagios-plugins-2.2.1.tar.gz
  11. -rw-r--r-- 1 root root 405725 Mar 11 2008 nrpe-2.12.tar.gz

(2) 添加 nagios 用户

  1. [root@web001 nagios]# /usr/sbin/useradd nagios -M -s /sbin/nologin
  2. [root@web001 nagios]# id nagios
  3. uid=1003(nagios) gid=1003(nagios) groups=1003(nagios)

(3)安装 nagios-plugins 插件

  1. [root@web001 nagios]# yum install perl-devel perl-CPAN openssl-devel -y
  2. root@web001 nagios]# tar xf nagios-plugins-2.2.1.tar.gz
  3. [root@web001 nagios]# cd nagios-plugins-2.2.1/
  4. [root@web001 nagios-plugins-2.2.1]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql
  5. [root@web001 nagios-plugins-2.2.1]# make
  6. [root@web001 nagios-plugins-2.2.1]# make install
  7. [root@web001 nagios-plugins-2.2.1]# cd ../
  8. [root@web001 nagios]# ls /usr/local/nagios/libexec/|wc -l
  9. 62

(4)安装 Nagios 客户端 nrpe 软件

  1. [root@web001 nagios]# tar xf nrpe-2.12.tar.gz
  2. [root@web001 nagios]# cd nrpe-2.12/
  3. [root@web001 nrpe-2.12]# ./configure
  4. [root@web001 nrpe-2.12]# make all
  5. [root@web001 nrpe-2.12]# make install-plugin
  6. [root@web001 nrpe-2.12]# make install-daemon
  7. [root@web001 nrpe-2.12]# make install-daemon-config

(5)配置监控内存、磁盘 I/O 脚本插件

  1. [root@web001 nagios]# wget https://github.com/yanglt7/picture/blob/master/check_iostat
  2. [root@web001 nagios]# wget https://github.com/yanglt7/picture/blob/master/check_memory.pl
  1. [root@web001 nagios]# yum install dos2unix -y
  2. [root@web001 nagios]# /bin/cp /home/ylt/tools/nagios/check_memory.pl /usr/local/nagios/libexec/
  3. [root@web001 nagios]# /bin/cp /home/ylt/tools/nagios/check_iostat /usr/local/nagios/libexec/
  4. [root@web001 nagios]# chmod 755 /usr/local/nagios/libexec/check_memory.pl
  5. [root@web001 nagios]# chmod 755 /usr/local/nagios/libexec/check_iostat
  6. [root@web001 nagios]# dos2unix /usr/local/nagios/libexec/check_memory.pl
  7. dos2unix: converting file /usr/local/nagios/libexec/check_memory.pl to Unix format ...
  8. [root@web001 nagios]# dos2unix /usr/local/nagios/libexec/check_iostat
  9. dos2unix: converting file /usr/local/nagios/libexec/check_iostat to Unix format ...
  1. [root@web001 nagios]# chmod a+x /usr/local/nagios/libexec/check_iostat
  2. [root@web001 nagios]# chmod a+x /usr/local/nagios/libexec/check_memory.pl

3.3 配置 Nagios 客户端 nrpe 服务

  1. [root@web001 nagios]# cd /usr/local/nagios/etc/
  2. [root@web001 etc]# sed -n '79p' nrpe.cfg
  3. allowed_hosts=127.0.0.1
  4. [root@web001 etc]# sed -i 's#allowed_hosts=127.0.0.1#allowed_hosts=127.0.0.1,192.168.2.151#g' nrpe.cfg
  5. [root@web001 etc]# sed -n '79p' nrpe.cfg
  6. allowed_hosts=127.0.0.1,192.168.2.151
  1. [root@web001 etc]# vim nrpe.cfg
  2. command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
  3. command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -w 10 -c 3
  4. command[check_disk]=/usr/local/nagios/libexec/check_disk -w 15% -c 7% -p /
  5. command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
  6. command[check_iostat]=/usr/local/nagios/libexec/check_iostat -s sda -w 30,200,20 -c 50,250,50
  1. [root@web001 etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
  1. [root@web001 etc]# netstat -lntup|grep nrpe
  2. tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 3063/nrpe
  3. [root@web001 etc]# ps -ef|grep nrpe|grep -v grep
  4. nagios 3152 1 0 19:18 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
  1. [root@web001 etc]# echo "#nagios nrpe process cmd by ylt at 20181014" >>/etc/rc.local
  2. [root@web001 etc]# echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >>/etc/rc.local
  3. [root@web001 etc]# tail -2 /etc/rc.local
  4. #nagios nrpe process cmd by ylt at 20181014
  5. /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

4. Nagios 服务器端监控

4.1 Nagios 服务器端监控基础介绍

(1)nagios 服务器端核心配置文件

  1. [root@nagios-server ~]# cd /usr/local/nagios/etc
  2. [root@nagios-server etc]# tree
  3. .
  4. |-- cgi.cfg
  5. |-- htpasswd.users
  6. |-- nagios.cfg
  7. |-- nrpe.cfg
  8. |-- objects
  9. | |-- commands.cfg
  10. | |-- contacts.cfg
  11. | |-- localhost.cfg
  12. | |-- printer.cfg
  13. | |-- switch.cfg
  14. | |-- templates.cfg
  15. | |-- timeperiods.cfg
  16. | `-- windows.cfg
  17. `-- resource.cfg
  18. 1 directory, 13 files

(2)配置主配置文件 nagios.cfg

  1. [root@nagios-server etc]# vim nagios.cfg
  2. 34 cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
  3. 35 cfg_file=/usr/local/nagios/etc/objects/services.cfg
  4. 36 cfg_dir=/usr/local/nagios/etc/objects/services
  1. #cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
  1. [root@nagios-server etc]# cd objects/
  2. [root@nagios-server objects]# head -51 localhost.cfg >hosts.cfg
  3. [root@nagios-server objects]# chown nagios.nagios /usr/local/nagios/etc/objects/hosts.cfg
  1. [root@nagios-server objects]# touch services.cfg
  2. [root@nagios-server objects]# chown nagios.nagios services.cfg
  1. [root@nagios-server objects]# mkdir services
  2. [root@nagios-server objects]# chown -R nagios.nagios services
  1. [root@nagios-server objects]# ls -lrt
  2. total 56
  3. -rw-rw-r-- 1 nagios nagios 10812 Oct 14 14:21 templates.cfg
  4. -rw-rw-r-- 1 nagios nagios 7716 Oct 14 14:21 commands.cfg
  5. -rw-rw-r-- 1 nagios nagios 3208 Oct 14 14:21 timeperiods.cfg
  6. -rw-rw-r-- 1 nagios nagios 5403 Oct 14 14:21 localhost.cfg
  7. -rw-rw-r-- 1 nagios nagios 4019 Oct 14 14:21 windows.cfg
  8. -rw-rw-r-- 1 nagios nagios 3124 Oct 14 14:21 printer.cfg
  9. -rw-rw-r-- 1 nagios nagios 3293 Oct 14 14:21 switch.cfg
  10. -rw-rw-r-- 1 nagios nagios 2165 Oct 14 14:37 contacts.cfg
  11. -rw-r--r-- 1 nagios nagios 1870 Oct 14 19:35 hosts.cfg
  12. -rw-r--r-- 1 nagios nagios 0 Oct 14 19:36 services.cfg
  13. drwxr-xr-x 2 nagios nagios 4096 Oct 14 19:37 services

4.2 配置 Nagios 服务器端监控项

(1)配置 hosts.cfg,定义要监控的 Nagios 客户端主机

  1. [root@nagios-server objects]# cat hosts.cfg
  2. #
  3. # HOST DEFINITION
  4. #
  5. # Define a host for the local machine
  6. define host{
  7. use linux-server ; Name of host template to use
  8. ; This host definition will inherit all variables that are defined
  9. ; in (or inherited by) the linux-server host template definition.
  10. host_name web001
  11. alias web001
  12. address 192.168.2.152
  13. }
  14. define host{
  15. use linux-server ; Name of host template to use
  16. ; This host definition will inherit all variables that are defined
  17. ; in (or inherited by) the linux-server host template definition.
  18. host_name web002
  19. alias web002
  20. address 192.168.2.144
  21. }
  22. #
  23. # HOST GROUP DEFINITION
  24. #
  25. # Define an optional hostgroup for Linux machines
  26. define hostgroup{
  27. hostgroup_name linux-servers ; The name of the hostgroup
  28. alias Linux Servers ; Long name of the group
  29. members web001,web002 ; Comma separated list of hosts that belong to this group
  30. }

(2)配置 services.cfg,定义要监控的资源服务

  1. define service {
  2. use generic-service
  3. host_name web001,web002
  4. service_description Disk Partition
  5. check_command check_nrpe!check_disk
  6. }
  7. define service {
  8. use generic-service
  9. host_name web001,web002
  10. service_description Swap Useage
  11. check_command check_nrpe!check_swap
  12. }
  13. define service {
  14. use generic-service
  15. host_name web001,web002
  16. service_description MEM Useage
  17. check_command check_nrpe!check_mem
  18. }
  19. define service {
  20. use generic-service
  21. host_name web001,web002
  22. service_description Current Load
  23. check_command check_nrpe!check_load
  24. }
  25. define service {
  26. use generic-service
  27. host_name web001,web002
  28. service_description Disk Iostat
  29. check_command check_nrpe!check_iostat!5!11
  30. }
  31. define service {
  32. use generic-service
  33. host_name web001,web002
  34. service_description PING
  35. check_command check_ping!100.0,20%!500.0,60%
  36. }

(3)配置 command.cfg,加入 check_nrpe 的插件配置

  1. [root@nagios-server objects]# tail -5 commands.cfg
  2. # 'check_nrpe' command definition
  3. define command{
  4. command_name check_nrpe
  5. command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
  6. }

(4)检查语法

  1. [root@nagios-server objects]# /etc/init.d/nagios checkconfig
  2. Total Warnings: 0
  3. Total Errors: 0
  4. Things look okay - No serious problems were detected during the pre-flight check
  5. OK.

监控主机针对本地各系统状态监控的成果

(5) 添加 http 服务的 URL 地址及端口监控

  1. [root@nagios-server ~]# /usr/local/nagios/libexec/check_http -H 192.168.2.152
  2. HTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.000986s;;;0.000000 size=258B;;;0
  1. [root@nagios-server objects]# sed -n '37,49p' services.cfg
  2. #url examples http://blog.yangyangyang.org
  3. define service {
  4. use generic-service
  5. host_name web001
  6. service_description blog_url
  7. check_command check_weburl!-H blog.yangyangyang.org
  8. }
  9. define service {
  10. use generic-service
  11. host_name web001
  12. service_description blog_url1
  13. check_command check_weburl!-H blog.yangyangyang.org -u /ylt.html
  14. }
  15. # -u 后加域名后面的地址,即检查真正的 URL 地址 http://blog.yangyangyang.org/ylt.html
  1. [root@nagios-server objects]# sed -n '144,154p' commands.cfg
  2. # 'check_http' command definition
  3. define command{
  4. command_name check_http
  5. command_line $USER1$/check_http -I $HOSTADDRESS$ $ARG1$
  6. }
  7. # 'check_weburl' command definition
  8. define command{
  9. command_name check_weburl
  10. command_line $USER1$/check_http $ARG1$ -w 10 -c 30
  11. }

(6)配置好 URL 后检查 Nagios 语法

  1. 192.168.2.148 blog.yangyangyang.org
  1. [root@web001 ~]# touch /var/www/html/index.html
  2. [root@web001 ~]# touch /var/www/html/ylt.html
  1. [root@nagios-server objects]# /etc/init.d/nagios checkconfig
  1. [root@nagios-server ~]# /etc/init.d/nagios reload
  1. [root@nagios-server objects]# /usr/local/nagios/libexec/check_http -H blog.yangyangyang.org
  2. HTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.000952s;;;0.000000 size=258B;;;0
  3. [root@nagios-server objects]# /usr/local/nagios/libexec/check_http -H blog.yangyangyang.org -u /ylt.html
  4. HTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.001255s;;;0.000000 size=258B;;;0

(7)监控任意端口实例

  1. [root@nagios-server ~]# /usr/local/nagios/libexec/check_tcp -H 192.168.2.152 -p 80
  2. TCP OK - 0.000 second response time on 192.168.2.152 port 80|time=0.000350s;;;0.000000;10.000000
  1. [root@nagios-server objects]# sed -n '50,60p' services.cfg
  2. define service {
  3. use generic-service
  4. host_name web001
  5. service_description ssh_52017
  6. check_command check_tcp!52017
  7. }
  8. define service {
  9. use generic-service
  10. host_name web001
  11. service_description http_80
  12. check_command check_tcp!80
  1. [root@nagios-server objects]# tail -4 commands.cfg
  2. define command{
  3. command_name check_memcached_11211
  4. command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 11211 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit
  5. }
  6. [root@nagios-server objects]# tail -6 services.cfg
  7. define service {
  8. use generic-service
  9. host_name web001
  10. service_description Memcached_11211
  11. check_command check_memcached_11211
  12. }

(8)监控 Memcached 服务

监控成果

4.3 Nagios 的调试

(1)检查 Nagios 语法并优化配置 Nagios 启动脚本,见 2.2 安装 Nagios 服务器端(6)配置并启动 Nagios 服务
(2)通过日志排查问题

  1. [root@nagios-server ~]# tail /usr/local/nagios/var/nagios.log
  2. [1539792000] CURRENT SERVICE STATE: web002;Swap Useage;CRITICAL;HARD;3;Connection refused or timed out
  3. [1539837678] Warning: A system time change of 0d 12h 41m 9s (forwards in time) has been detected. Compensating...
  4. [1539837700] HOST NOTIFICATION: nagiosadmin;web002;DOWN;notify-host-by-email;CRITICAL - Host Unreachable (192.168.2.144)
  5. [1539838070] SERVICE ALERT: web001;Disk Iostat;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  6. [1539838120] SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  7. [1539838190] SERVICE ALERT: web001;Disk Iostat;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.
  8. [1539838210] SERVICE ALERT: web001;Disk Partition;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  9. [1539838220] SERVICE ALERT: web001;MEM Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  10. [1539838240] SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.
  11. [1539838250] SERVICE ALERT: web001;Swap Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  12. [root@nagios-server ~]# tail /var/log/messages
  13. Oct 18 12:50:20 nagios-server nagios: SERVICE ALERT: web001;MEM Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  14. Oct 18 12:50:35 nagios-server systemd: Started Session 89 of user ylt.
  15. Oct 18 12:50:35 nagios-server systemd-logind: New session 89 of user ylt.
  16. Oct 18 12:50:35 nagios-server systemd: Starting Session 89 of user ylt.
  17. Oct 18 12:50:35 nagios-server dbus[646]: [system] Activating service name='org.freedesktop.problems' (using servicehelper)
  18. Oct 18 12:50:35 nagios-server dbus[646]: [system] Successfully activated service 'org.freedesktop.problems'
  19. Oct 18 12:50:40 nagios-server nagios: SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.
  20. Oct 18 12:50:50 nagios-server nagios: SERVICE ALERT: web001;Swap Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.
  21. Oct 18 12:51:10 nagios-server systemd-logind: Removed session 83.
  22. Oct 18 12:51:15 nagios-server su: (to root) ylt on pts/1

5. 服务器端 Nagios 图形监控显示和管理

5.1 服务器端安装 PNP 生成图形

(1)PNP 出图基础依赖软件安装

  1. [root@nagios-server ~]# yum install cairo pango zlib zlib-level freetype freetype-devel gd gd-devel -y
  2. [root@nagios-server ~]# rpm -qa cairo pango zlib zlib-level freetype freetype-devel gd gd-devel
  3. zlib-1.2.7-17.el7.x86_64
  4. gd-devel-2.0.35-26.el7.x86_64
  5. gd-2.0.35-26.el7.x86_64
  6. freetype-2.4.11-15.el7.x86_64
  7. freetype-devel-2.4.11-15.el7.x86_64
  8. pango-1.40.4-1.el7.x86_64
  9. cairo-1.14.8-2.el7.x86_64
  1. [root@nagios-server ~]# yum install libart_lgpl libart_lgpl-devel -y
  2. [root@nagios-server ~]# rpm -qa libart_lgpl libart_lgpl-devel
  3. libart_lgpl-2.3.21-10.el7.x86_64
  4. libart_lgpl-devel-2.3.21-10.el7.x86_64
  1. [root@nagios-server ~]# yum install rrdtool rrdtool-devel -y
  2. [root@nagios-server ~]# rpm -qa rrdtool rrdtool-devel
  3. rrdtool-1.4.8-9.el7.x86_64
  4. rrdtool-devel-1.4.8-9.el7.x86_64
  5. [root@nagios-server ~]# which rrdtool
  6. /bin/rrdtool

(2)安装出图 Web 界面展示软件 PNP

  1. [root@nagios-server ~]# cd /home/ylt/tools/
  2. [root@nagios-server tools]# wget https://sourceforge.net/projects/pnp4nagios/files/PNP/pnp-0.4.14/pnp-0.4.14.tar.gz/download
  3. [root@nagios-server tools]$ yum install perl-Time-HiRes -y
  4. [root@nagios-server tools]# tar zxf pnp-0.4.14.tar.gz
  5. [root@nagios-server pnp-0.4.14]# ./configure --with-rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata
  6. [root@nagios-server pnp-0.4.14]# make all
  7. [root@nagios-server pnp-0.4.14]# make install
  8. [root@nagios-server pnp-0.4.14]# make install-config
  9. [root@nagios-server pnp-0.4.14]# make install-init
  10. [root@nagios-server pnp-0.4.14]# ll /usr/local/nagios/libexec/ |grep process
  11. -rwxr-xr-x 1 nagios nagios 31804 Oct 18 18:50 process_perfdata.pl

(3)Nagios 出图相关配置

  1. [root@nagios-server etc]# sed -n '834p' nagios.cfg
  2. process_performance_data=1
  1. [root@nagios-server etc]# sed -n '846,847p' nagios.cfg
  2. host_perfdata_command=process-host-perfdata
  3. service_perfdata_command=process-service-perfdata
  1. [root@nagios-server etc]# sed -n '234,245p' objects/commands.cfg
  2. # 'process-host-perfdata' command definition
  3. define command{
  4. command_name process-host-perfdata
  5. command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out
  6. }
  7. # 'process-service-perfdata' command definition
  8. define command{
  9. command_name process-service-perfdata
  10. command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out
  11. }
  1. [root@nagios-server etc]# /etc/init.d/nagios checkconfig
  2. [root@nagios-server etc]# /etc/init.d/nagios reload

5.2 配置主机及服务获取状态数据出图

(1)设置让被监控的主机记录数据

  1. [root@nagios-server ~]# cd /usr/local/nagios/etc/objects/
  2. [root@nagios-server objects]# sed -n '23,42p' hosts.cfg
  3. # Define a host for the local machine
  4. define host{
  5. use linux-server ; Name of host template to use
  6. ; This host definition will inherit all variables that are defined
  7. ; in (or inherited by) the linux-server host template definition.
  8. host_name web001
  9. alias web001
  10. address 192.168.2.152
  11. process_perf_data 1 #<==此行表示将记录 web001 主机的状态数据
  12. }
  13. define host{
  14. use linux-server ; Name of host template to use
  15. ; This host definition will inherit all variables that are defined
  16. ; in (or inherited by) the linux-server host template definition.
  17. host_name web002
  18. alias web002
  19. address 192.168.2.144
  20. process_perf_data 1
  21. }

(2)设置让被监控主机对应的服务记录数据

  1. [root@nagios-server objects]# head -7 services.cfg
  2. define service {
  3. use generic-service
  4. host_name web001,web002
  5. service_description Disk Partition
  6. check_command check_nrpe!check_disk
  7. process_perf_data 1 #<==此行表示将记录 web001 主机的剩余磁盘空间状态数据
  8. }
  1. [root@nagios-server objects]# sed -n '154,176p' templates.cfg |sed -r 's#(.*);.*$#\1#g'
  2. name generic-service
  3. active_checks_enabled 1
  4. passive_checks_enabled 1
  5. parallelize_check 1
  6. obsess_over_service 1
  7. check_freshness 0
  8. notifications_enabled 1
  9. event_handler_enabled 1
  10. flap_detection_enabled 1
  11. failure_prediction_enabled 1
  12. process_perf_data 1
  13. retain_status_information 1
  14. retain_nonstatus_information 1
  15. is_volatile 0
  16. check_period 24x7
  17. max_check_attempts 3
  18. normal_check_interval 10
  19. retry_check_interval 2
  20. contact_groups admins
  21. notification_options w,u,c,r
  22. notification_interval 60
  23. notification_period 24x7
  24. register 0
  1. [root@nagios-server etc]# /etc/init.d/nagios checkconfig
  2. [root@nagios-server etc]# /etc/init.d/nagios reload

图形数据

5.3 整合 PNP URL 超链接到 Nagios Web 界面

(1)给被监控的所有主机添加超链接图标

  1. [root@nagios-server objects]# sed -n '23,34p' hosts.cfg
  2. # Define a host for the local machine
  3. define host{
  4. use linux-server ; Name of host template to use
  5. ; This host definition will inherit all variables that are defined
  6. ; in (or inherited by) the linux-server host template definition.
  7. host_name web001
  8. alias web001
  9. address 192.168.2.152
  10. process_perf_data 1
  11. action_url /nagios/pnp/index.php?host=$HOSTNAME$ #<== 添加超链接图标
  12. }
  1. [root@nagios-server etc]# /etc/init.d/nagios checkconfig
  2. [root@nagios-server etc]# /etc/init.d/nagios reload

PNP URL

(2)给被监控主机指定的服务添加超链接图标

  1. [root@nagios-server objects]# head -8 services.cfg
  2. define service {
  3. use generic-service
  4. host_name web001,web002
  5. service_description Disk Partition
  6. check_command check_nrpe!check_disk
  7. process_perf_data 1
  8. action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$ #<== 给具体服务添加超链接图标
  9. }
  1. [root@nagios-server objects]# sed -n '177p' templates.cfg |sed -r 's#(.*);.*$#\1#g'
  2. action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$
  1. [root@nagios-server etc]# /etc/init.d/nagios checkconfig
  2. [root@nagios-server etc]# /etc/init.d/nagios reload

PNP URL services

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注