@yanglt7
2018-10-21T15:58:45.000000Z
字数 27699
阅读 1090
Web集群实战
(1)准备 3 台服务器或 VM 虚拟机
| HOSTNAME | IP | 说明 |
|---|---|---|
| nagios-server | 192.168.2.151 | Nagios 服务器端 |
| web001 | 192.168.2.152 | 被监控的客户端服务器 |
| web002 | 192.168.2.144 | 被监控的客户端服务器 |
(2)设置 yum 安装源
[root@nagios-server ~]# cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.bak[root@nagios-server ~]# wget /etc/yum.repos.d/CentOS-Base.repo http://mirrors.163.com/.help/CentOS7-Base-163.repo
(3) 解决 Perl 软件编译问题
[root@nagios-server ~]# echo 'export LC_ALL=C'>> /etc/profile[root@nagios-server ~]# tail -1 /etc/profileexport LC_ALL=C[root@nagios-server ~]# source /etc/profile[root@nagios-server ~]# echo $LC_ALLC
(4)关闭 Nagios Server 端防火墙及 SELinux
[root@nagios-server ~]# systemctl disable firewalld.service[root@nagios-server ~]# systemctl stop firewalld.service[root@nagios-server ~]# systemctl status firewalld.service* firewalld.service - firewalld - dynamic firewall daemonLoaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)Active: inactive (dead)Docs: man:firewalld(1)[root@nagios-server ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config# 修改配置文件可使配置永久生效,需重启系统[root@nagios-server ~]# cat /etc/selinux/config|grep SELINUX=disabledSELINUX=disabled[root@nagios-server ~]# getenforceDisabled
(5)解决系统时间同步问题
[root@nagios-server ~]# echo '#time sync by nagios-server at 2018-09-16' >>/var/spool/cron/root[root@nagios-server ~]# echo '*/5 * * * * /usr/sbin/ntpdate ntp1.aliyun.com >/dev/null 2&1' >> /var/spool/cron/root[root@nagios-server ~]# crontab -l#time sync by nagios-server at 2018-10-13*/2 * * * * /usr/sbin/ntpdate ntp1.aliyun.com>/dev/null 2>&1
(6) 安装 Nagios 服务器端所需软件包(LAMP 环境)
[root@nagios-server ~]# yum install gcc glibc glibc-common -y[root@nagios-server ~]# yum install gd gd-devel -y[root@nagios-server ~]# yum install httpd php php-gd -y[root@nagios-server ~]# rpm -qa httpd phphttpd-2.4.6-80.el7.centos.1.x86_64php-5.4.16-45.el7.x86_64
MySQL 安装参见【Web 集群实战】12_LNMP 之 MySQL 的安装与配置
(7)创建 Nagios 服务器端需要的用户及组
[root@nagios-server ~]# /usr/sbin/useradd nagios[root@nagios-server ~]# /usr/sbin/useradd apache -M -s /sbin/nologinuseradd: user 'apache' already exists[root@nagios-server ~]# /usr/sbin/groupadd nagcmd[root@nagios-server ~]# /usr/sbin/usermod -a -G nagcmd nagios[root@nagios-server ~]# /usr/sbin/usermod -a -G nagcmd apache[root@nagios-server ~]# id -n -G nagiosnagios nagcmd[root@nagios-server ~]# id -n -G apacheapache nagcmd[root@nagios-server ~]# groups nagiosnagios : nagios nagcmd[root@nagios-server ~]# groups apacheapache : apache nagcmd
(8)下载所需软件包
[root@nagios-server ~]# cd /home/ylt/tools/[root@nagios-server tools]# mkdir nagios -p[root@nagios-server tools]# cd nagios/[root@nagios-server nagios]# wget https://sourceforge.net/projects/nagios/files/nagios-3.x/nagios-3.5.1/nagios-3.5.1.tar.gz/download[root@nagios-server nagios]# lltotal 1724-rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz[root@nagios-server nagios]# wget https://nagios-plugins.org/download/nagios-plugins-2.2.1.tar.gz#_ga=2.27512634.762344303.1539496511-137884230.1539496511[root@nagios-server nagios]# ll nagios-plugins-2.2.1.tar.gz-rw-r--r-- 1 root root 2728818 Apr 20 2017 nagios-plugins-2.2.1.tar.gz[root@nagios-server nagios]# wget https://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download[root@nagios-server nagios]# ll nrpe-2.12.tar.gz-rw-r--r-- 1 root root 405725 Mar 11 2008 nrpe-2.12.tar.gz
(9)启动 LAMP 环境的 HTTP 服务
[root@nagios-server tools]# systemctl start httpd[root@nagios-server tools]# lsof -i:80COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAMEhttpd 1352 root 4u IPv6 21968 0t0 TCP *:http (LISTEN)httpd 1353 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)httpd 1354 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)httpd 1355 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)httpd 1356 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)httpd 1357 apache 4u IPv6 21968 0t0 TCP *:http (LISTEN)
[root@nagios-server nagios]# tar xf nagios-3.5.1.tar.gz[root@nagios-server nagios]# lltotal 1728drwxrwxr-x 15 root root 4096 Aug 31 2013 nagios-rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz[root@nagios-server nagios]# cd nagios/[root@nagios-server nagios]# ./configure --with-command-group=nagcmdReview the options above for accuracy. If they look okay,type 'make all' to compile the main program and CGIs.[root@nagios-server nagios]# make allEnjoy.[root@nagios-server nagios]# make installmake install-init- This installs the init script in /etc/rc.d/init.dmake install-commandmode- This installs and configures permissions on thedirectory for holding the external command filemake install-config- This installs sample config files in /usr/local/nagios/etcmake[1]: Leaving directory `/home/ylt/tools/nagios/nagios'[root@nagios-server nagios]# make install-init*** Init script installed ***[root@nagios-server nagios]# make install-commandmode*** External command directory configured ***[root@nagios-server nagios]# make install-config*** Config files installed ***
(1) 安装 Nagios Web 配置文件及创建登录用户
[root@nagios-server nagios]# make install-webconf*** Nagios/Apache conf file installed ***
[root@nagios-server nagios]# cd ..[root@nagios-server nagios]# htpasswd -bc /usr/local/nagios/etc/htpasswd.users nagios nagiosAdding password for user ylt[root@nagios-server nagios]# cat /usr/local/nagios/etc/htpasswd.usersnagios:$apr1$l7AGreUZ$LUP7tkFCcLoJ21cACkOvU/
[root@nagios-server nagios]# systemctl reload httpd
(2)添加监控报警信息接收的 Email 地址
[root@nagios-server nagios]# sed -i 's#nagios@localhost#yanglt7@163.com#g' /usr/local/nagios/etc/objects/contacts.cfg[root@nagios-server nagios]# sed -n '35p' /usr/local/nagios/etc/objects/contacts.cfgemail yanglt7@163.com ;
[root@nagios-server nagios]# tail -2 /etc/mail.rcset from=1622320046@qq.comsmtp=smtp.qq.com smtp-auth-user=1622320046 smtp-auth-password=password smtp-auto=login
(3)解决 Web 端用户 nagios 没有被许可查看服务资源的问题,将 nagiosadmin 改成 nagios
[root@nagios-server etc]# cat cgi.cfg|grep ^authorized_forauthorized_for_system_information=nagiosauthorized_for_configuration_information=nagiosauthorized_for_system_commands=nagiosauthorized_for_all_services=nagiosauthorized_for_all_hosts=nagiosauthorized_for_all_service_commands=nagiosauthorized_for_all_host_commands=nagios
(4)配置启动 Apache 服务
[root@nagios-server nagios]# systemctl enable httpdCreated symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib/systemd/system/httpd.service.[root@nagios-server nagios]# systemctl restart httpd[root@nagios-server nagios]# netstat -lntup|grep httpdtcp6 0 0 :::80 :::* LISTEN 1932/httpd

(4)安装 Nagios 插件软件包
[root@nagios-server nagios]# yum install perl-devel openssl-devel -y
[root@nagios-server nagios]# tar xf nagios-plugins-2.2.1.tar.gz[root@nagios-server nagios]# cd nagios-plugins-2.2.1/[root@nagios-server nagios-plugins-2.2.1]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql[root@nagios-server nagios-plugins-2.2.1]# make[root@nagios-server nagios-plugins-2.2.1]# make install
[root@nagios-server nagios-plugins-2.2.1]# ls /usr/local/nagios/libexec/|wc -l62
(5)安装 nrpe 软件
[root@nagios-server nagios-plugins-2.2.1]# ls /usr/local/nagios/libexec/check_nrpels: cannot access /usr/local/nagios/libexec/check_nrpe: No such file or directory
[root@nagios-server nagios-plugins-2.2.1]# cd ../[root@nagios-server nagios]# tar xf nrpe-2.12.tar.gz[root@nagios-server nagios]# cd nrpe-2.12/[root@nagios-server nrpe-2.12]# ./configure[root@nagios-server nrpe-2.12]# make all[root@nagios-server nrpe-2.12]# make install-plugin[root@nagios-server nrpe-2.12]# make install-daemon[root@nagios-server nrpe-2.12]# make install-daemon-config
[root@nagios-server nagios]# ls /usr/local/nagios/libexec/check_nrpe/usr/local/nagios/libexec/check_nrpe[root@nagios-server nagios]# ls /usr/local/nagios/libexec/|wc -l63
(6)配置并启动 Nagios 服务
[root@nagios-server nagios]# /sbin/chkconfig nagios on[root@nagios-server nagios]# chkconfig --list nagiosnagios 0:off 1:off 2:on 3:on 4:on 5:on 6:off[root@nagios-server ~]# echo "/etc/init.d/nagios start" >>/etc/rc.local[root@nagios-server ~]# tail -1 /etc/rc.local/etc/init.d/nagios start
[root@nagios-server ~]# /etc/init.d/nagios checkconfigRunning configuration check... OK.[root@nagios-server ~]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfgTotal Warnings: 0Total Errors: 0Things look okay - No serious problems were detected during the pre-flight check
[root@nagios-server ~]# grep 'checkconfig)' -n -A 2 /etc/init.d/nagios181: checkconfig)182- printf "Running configuration check..."183- $NagiosBin -v $NagiosCfgFile > /dev/null 2>&1;# 删除脚本中的 > /dev/null 2>&1[root@nagios-server ~]# vim /etc/init.d/nagios[root@nagios-server ~]# grep 'checkconfig)' -n -A 2 /etc/init.d/nagios181: checkconfig)182- printf "Running configuration check..."183- $NagiosBin -v $NagiosCfgFile;
[root@nagios-server ~]# /etc/init.d/nagios checkconfigTotal Warnings: 0Total Errors: 0Things look okay - No serious problems were detected during the pre-flight checkOK.
[root@nagios-server ~]# /etc/init.d/nagios restartRestarting nagios (via systemctl): Warning: nagios.service changed on disk. Run 'systemctl daemon-reload' to reload units.[ OK ][root@nagios-server ~]# systemctl daemon-reload[root@nagios-server ~]# /etc/init.d/nagios restartRestarting nagios (via systemctl): [ OK ][root@nagios-server ~]# ps -ef|grep nagios|grep -v grepnagios 1408 1 0 15:54 ? 00:00:00 /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg[root@nagios-server ~]# netstat -lntup|grep nagios# 无输出
(1)准备 2 台服务器或 VM 虚拟机
| HOSTNAME | IP | 说明 |
|---|---|---|
| web001 | 192.168.2.152 | 被监控的客户端服务器 |
| web002 | 192.168.2.144 | 被监控的客户端服务器 |
(2)环境准备和服务器端步骤相同
(1)下载所需软件包
[root@web001 ~]# yum install gcc glibc-common -y[root@web001 ~]# mkdir /home/ylt/tools/nagios[root@web001 ~]# cd /home/ylt/tools/nagios[root@web001 nagios]# wget https://sourceforge.net/projects/nagios/files/nagios-3.x/nagios-3.5.1/nagios-3.5.1.tar.gz/download[root@web001 nagios]# wget https://nagios-plugins.org/download/nagios-plugins-2.2.1.tar.gz#_ga=2.27512634.762344303.1539496511-137884230.1539496511[root@web001 nagios]# wget https://sourceforge.net/projects/nagios/files/nrpe-2.x/nrpe-2.12/nrpe-2.12.tar.gz/download[root@web001 nagios]# lltotal 4792-rw-r--r-- 1 root root 1763584 Aug 31 2013 nagios-3.5.1.tar.gz-rw-r--r-- 1 root root 2728818 Apr 20 2017 nagios-plugins-2.2.1.tar.gz-rw-r--r-- 1 root root 405725 Mar 11 2008 nrpe-2.12.tar.gz
(2) 添加 nagios 用户
[root@web001 nagios]# /usr/sbin/useradd nagios -M -s /sbin/nologin[root@web001 nagios]# id nagiosuid=1003(nagios) gid=1003(nagios) groups=1003(nagios)
(3)安装 nagios-plugins 插件
[root@web001 nagios]# yum install perl-devel perl-CPAN openssl-devel -yroot@web001 nagios]# tar xf nagios-plugins-2.2.1.tar.gz[root@web001 nagios]# cd nagios-plugins-2.2.1/[root@web001 nagios-plugins-2.2.1]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql[root@web001 nagios-plugins-2.2.1]# make[root@web001 nagios-plugins-2.2.1]# make install[root@web001 nagios-plugins-2.2.1]# cd ../[root@web001 nagios]# ls /usr/local/nagios/libexec/|wc -l62
(4)安装 Nagios 客户端 nrpe 软件
[root@web001 nagios]# tar xf nrpe-2.12.tar.gz[root@web001 nagios]# cd nrpe-2.12/[root@web001 nrpe-2.12]# ./configure[root@web001 nrpe-2.12]# make all[root@web001 nrpe-2.12]# make install-plugin[root@web001 nrpe-2.12]# make install-daemon[root@web001 nrpe-2.12]# make install-daemon-config
(5)配置监控内存、磁盘 I/O 脚本插件
[root@web001 nagios]# wget https://github.com/yanglt7/picture/blob/master/check_iostat[root@web001 nagios]# wget https://github.com/yanglt7/picture/blob/master/check_memory.pl
[root@web001 nagios]# yum install dos2unix -y[root@web001 nagios]# /bin/cp /home/ylt/tools/nagios/check_memory.pl /usr/local/nagios/libexec/[root@web001 nagios]# /bin/cp /home/ylt/tools/nagios/check_iostat /usr/local/nagios/libexec/[root@web001 nagios]# chmod 755 /usr/local/nagios/libexec/check_memory.pl[root@web001 nagios]# chmod 755 /usr/local/nagios/libexec/check_iostat[root@web001 nagios]# dos2unix /usr/local/nagios/libexec/check_memory.pldos2unix: converting file /usr/local/nagios/libexec/check_memory.pl to Unix format ...[root@web001 nagios]# dos2unix /usr/local/nagios/libexec/check_iostatdos2unix: converting file /usr/local/nagios/libexec/check_iostat to Unix format ...
[root@web001 nagios]# chmod a+x /usr/local/nagios/libexec/check_iostat[root@web001 nagios]# chmod a+x /usr/local/nagios/libexec/check_memory.pl
[root@web001 nagios]# cd /usr/local/nagios/etc/[root@web001 etc]# sed -n '79p' nrpe.cfgallowed_hosts=127.0.0.1[root@web001 etc]# sed -i 's#allowed_hosts=127.0.0.1#allowed_hosts=127.0.0.1,192.168.2.151#g' nrpe.cfg[root@web001 etc]# sed -n '79p' nrpe.cfgallowed_hosts=127.0.0.1,192.168.2.151
[root@web001 etc]# vim nrpe.cfgcommand[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -w 10 -c 3command[check_disk]=/usr/local/nagios/libexec/check_disk -w 15% -c 7% -p /command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%command[check_iostat]=/usr/local/nagios/libexec/check_iostat -s sda -w 30,200,20 -c 50,250,50
[root@web001 etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
[root@web001 etc]# netstat -lntup|grep nrpetcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 3063/nrpe[root@web001 etc]# ps -ef|grep nrpe|grep -v grepnagios 3152 1 0 19:18 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
[root@web001 etc]# echo "#nagios nrpe process cmd by ylt at 20181014" >>/etc/rc.local[root@web001 etc]# echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >>/etc/rc.local[root@web001 etc]# tail -2 /etc/rc.local#nagios nrpe process cmd by ylt at 20181014/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
(1)nagios 服务器端核心配置文件
[root@nagios-server ~]# cd /usr/local/nagios/etc[root@nagios-server etc]# tree.|-- cgi.cfg|-- htpasswd.users|-- nagios.cfg|-- nrpe.cfg|-- objects| |-- commands.cfg| |-- contacts.cfg| |-- localhost.cfg| |-- printer.cfg| |-- switch.cfg| |-- templates.cfg| |-- timeperiods.cfg| `-- windows.cfg`-- resource.cfg1 directory, 13 files
(2)配置主配置文件 nagios.cfg
[root@nagios-server etc]# vim nagios.cfg34 cfg_file=/usr/local/nagios/etc/objects/hosts.cfg35 cfg_file=/usr/local/nagios/etc/objects/services.cfg36 cfg_dir=/usr/local/nagios/etc/objects/services
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
[root@nagios-server etc]# cd objects/[root@nagios-server objects]# head -51 localhost.cfg >hosts.cfg[root@nagios-server objects]# chown nagios.nagios /usr/local/nagios/etc/objects/hosts.cfg
[root@nagios-server objects]# touch services.cfg[root@nagios-server objects]# chown nagios.nagios services.cfg
[root@nagios-server objects]# mkdir services[root@nagios-server objects]# chown -R nagios.nagios services
[root@nagios-server objects]# ls -lrttotal 56-rw-rw-r-- 1 nagios nagios 10812 Oct 14 14:21 templates.cfg-rw-rw-r-- 1 nagios nagios 7716 Oct 14 14:21 commands.cfg-rw-rw-r-- 1 nagios nagios 3208 Oct 14 14:21 timeperiods.cfg-rw-rw-r-- 1 nagios nagios 5403 Oct 14 14:21 localhost.cfg-rw-rw-r-- 1 nagios nagios 4019 Oct 14 14:21 windows.cfg-rw-rw-r-- 1 nagios nagios 3124 Oct 14 14:21 printer.cfg-rw-rw-r-- 1 nagios nagios 3293 Oct 14 14:21 switch.cfg-rw-rw-r-- 1 nagios nagios 2165 Oct 14 14:37 contacts.cfg-rw-r--r-- 1 nagios nagios 1870 Oct 14 19:35 hosts.cfg-rw-r--r-- 1 nagios nagios 0 Oct 14 19:36 services.cfgdrwxr-xr-x 2 nagios nagios 4096 Oct 14 19:37 services
(1)配置 hosts.cfg,定义要监控的 Nagios 客户端主机
[root@nagios-server objects]# cat hosts.cfg## HOST DEFINITION## Define a host for the local machinedefine host{use linux-server ; Name of host template to use; This host definition will inherit all variables that are defined; in (or inherited by) the linux-server host template definition.host_name web001alias web001address 192.168.2.152}define host{use linux-server ; Name of host template to use; This host definition will inherit all variables that are defined; in (or inherited by) the linux-server host template definition.host_name web002alias web002address 192.168.2.144}## HOST GROUP DEFINITION## Define an optional hostgroup for Linux machinesdefine hostgroup{hostgroup_name linux-servers ; The name of the hostgroupalias Linux Servers ; Long name of the groupmembers web001,web002 ; Comma separated list of hosts that belong to this group}
(2)配置 services.cfg,定义要监控的资源服务
define service {use generic-servicehost_name web001,web002service_description Disk Partitioncheck_command check_nrpe!check_disk}define service {use generic-servicehost_name web001,web002service_description Swap Useagecheck_command check_nrpe!check_swap}define service {use generic-servicehost_name web001,web002service_description MEM Useagecheck_command check_nrpe!check_mem}define service {use generic-servicehost_name web001,web002service_description Current Loadcheck_command check_nrpe!check_load}define service {use generic-servicehost_name web001,web002service_description Disk Iostatcheck_command check_nrpe!check_iostat!5!11}define service {use generic-servicehost_name web001,web002service_description PINGcheck_command check_ping!100.0,20%!500.0,60%}
(3)配置 command.cfg,加入 check_nrpe 的插件配置
[root@nagios-server objects]# tail -5 commands.cfg# 'check_nrpe' command definitiondefine command{command_name check_nrpecommand_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$}
(4)检查语法
[root@nagios-server objects]# /etc/init.d/nagios checkconfigTotal Warnings: 0Total Errors: 0Things look okay - No serious problems were detected during the pre-flight checkOK.

(5) 添加 http 服务的 URL 地址及端口监控
[root@nagios-server ~]# /usr/local/nagios/libexec/check_http -H 192.168.2.152HTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.000986s;;;0.000000 size=258B;;;0
下面是对域名 URL 地址 进行监控的配置
编辑 services.cfg 文件
[root@nagios-server objects]# sed -n '37,49p' services.cfg#url examples http://blog.yangyangyang.orgdefine service {use generic-servicehost_name web001service_description blog_urlcheck_command check_weburl!-H blog.yangyangyang.org}define service {use generic-servicehost_name web001service_description blog_url1check_command check_weburl!-H blog.yangyangyang.org -u /ylt.html}# -u 后加域名后面的地址,即检查真正的 URL 地址 http://blog.yangyangyang.org/ylt.html
[root@nagios-server objects]# sed -n '144,154p' commands.cfg# 'check_http' command definitiondefine command{command_name check_httpcommand_line $USER1$/check_http -I $HOSTADDRESS$ $ARG1$}# 'check_weburl' command definitiondefine command{command_name check_weburlcommand_line $USER1$/check_http $ARG1$ -w 10 -c 30}
(6)配置好 URL 后检查 Nagios 语法
192.168.2.148 blog.yangyangyang.org
[root@web001 ~]# touch /var/www/html/index.html[root@web001 ~]# touch /var/www/html/ylt.html
[root@nagios-server objects]# /etc/init.d/nagios checkconfig
[root@nagios-server ~]# /etc/init.d/nagios reload
[root@nagios-server objects]# /usr/local/nagios/libexec/check_http -H blog.yangyangyang.orgHTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.000952s;;;0.000000 size=258B;;;0[root@nagios-server objects]# /usr/local/nagios/libexec/check_http -H blog.yangyangyang.org -u /ylt.htmlHTTP OK: HTTP/1.1 200 OK - 258 bytes in 0.001 second response time |time=0.001255s;;;0.000000 size=258B;;;0
(7)监控任意端口实例
[root@nagios-server ~]# /usr/local/nagios/libexec/check_tcp -H 192.168.2.152 -p 80TCP OK - 0.000 second response time on 192.168.2.152 port 80|time=0.000350s;;;0.000000;10.000000
[root@nagios-server objects]# sed -n '50,60p' services.cfgdefine service {use generic-servicehost_name web001service_description ssh_52017check_command check_tcp!52017}define service {use generic-servicehost_name web001service_description http_80check_command check_tcp!80
[root@nagios-server objects]# tail -4 commands.cfgdefine command{command_name check_memcached_11211command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 11211 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit}[root@nagios-server objects]# tail -6 services.cfgdefine service {use generic-servicehost_name web001service_description Memcached_11211check_command check_memcached_11211}
(8)监控 Memcached 服务

(1)检查 Nagios 语法并优化配置 Nagios 启动脚本,见 2.2 安装 Nagios 服务器端(6)配置并启动 Nagios 服务
(2)通过日志排查问题
[root@nagios-server ~]# tail /usr/local/nagios/var/nagios.log[1539792000] CURRENT SERVICE STATE: web002;Swap Useage;CRITICAL;HARD;3;Connection refused or timed out[1539837678] Warning: A system time change of 0d 12h 41m 9s (forwards in time) has been detected. Compensating...[1539837700] HOST NOTIFICATION: nagiosadmin;web002;DOWN;notify-host-by-email;CRITICAL - Host Unreachable (192.168.2.144)[1539838070] SERVICE ALERT: web001;Disk Iostat;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838120] SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838190] SERVICE ALERT: web001;Disk Iostat;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838210] SERVICE ALERT: web001;Disk Partition;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838220] SERVICE ALERT: web001;MEM Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838240] SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.[1539838250] SERVICE ALERT: web001;Swap Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.[root@nagios-server ~]# tail /var/log/messagesOct 18 12:50:20 nagios-server nagios: SERVICE ALERT: web001;MEM Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.Oct 18 12:50:35 nagios-server systemd: Started Session 89 of user ylt.Oct 18 12:50:35 nagios-server systemd-logind: New session 89 of user ylt.Oct 18 12:50:35 nagios-server systemd: Starting Session 89 of user ylt.Oct 18 12:50:35 nagios-server dbus[646]: [system] Activating service name='org.freedesktop.problems' (using servicehelper)Oct 18 12:50:35 nagios-server dbus[646]: [system] Successfully activated service 'org.freedesktop.problems'Oct 18 12:50:40 nagios-server nagios: SERVICE ALERT: web001;Current Load;CRITICAL;SOFT;2;CHECK_NRPE: Error - Could not complete SSL handshake.Oct 18 12:50:50 nagios-server nagios: SERVICE ALERT: web001;Swap Useage;CRITICAL;SOFT;1;CHECK_NRPE: Error - Could not complete SSL handshake.Oct 18 12:51:10 nagios-server systemd-logind: Removed session 83.Oct 18 12:51:15 nagios-server su: (to root) ylt on pts/1
(1)PNP 出图基础依赖软件安装
[root@nagios-server ~]# yum install cairo pango zlib zlib-level freetype freetype-devel gd gd-devel -y[root@nagios-server ~]# rpm -qa cairo pango zlib zlib-level freetype freetype-devel gd gd-develzlib-1.2.7-17.el7.x86_64gd-devel-2.0.35-26.el7.x86_64gd-2.0.35-26.el7.x86_64freetype-2.4.11-15.el7.x86_64freetype-devel-2.4.11-15.el7.x86_64pango-1.40.4-1.el7.x86_64cairo-1.14.8-2.el7.x86_64
[root@nagios-server ~]# yum install libart_lgpl libart_lgpl-devel -y[root@nagios-server ~]# rpm -qa libart_lgpl libart_lgpl-devellibart_lgpl-2.3.21-10.el7.x86_64libart_lgpl-devel-2.3.21-10.el7.x86_64
[root@nagios-server ~]# yum install rrdtool rrdtool-devel -y[root@nagios-server ~]# rpm -qa rrdtool rrdtool-develrrdtool-1.4.8-9.el7.x86_64rrdtool-devel-1.4.8-9.el7.x86_64[root@nagios-server ~]# which rrdtool/bin/rrdtool
(2)安装出图 Web 界面展示软件 PNP
[root@nagios-server ~]# cd /home/ylt/tools/[root@nagios-server tools]# wget https://sourceforge.net/projects/pnp4nagios/files/PNP/pnp-0.4.14/pnp-0.4.14.tar.gz/download[root@nagios-server tools]$ yum install perl-Time-HiRes -y[root@nagios-server tools]# tar zxf pnp-0.4.14.tar.gz[root@nagios-server pnp-0.4.14]# ./configure --with-rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata[root@nagios-server pnp-0.4.14]# make all[root@nagios-server pnp-0.4.14]# make install[root@nagios-server pnp-0.4.14]# make install-config[root@nagios-server pnp-0.4.14]# make install-init[root@nagios-server pnp-0.4.14]# ll /usr/local/nagios/libexec/ |grep process-rwxr-xr-x 1 nagios nagios 31804 Oct 18 18:50 process_perfdata.pl
(3)Nagios 出图相关配置
[root@nagios-server etc]# sed -n '834p' nagios.cfgprocess_performance_data=1
[root@nagios-server etc]# sed -n '846,847p' nagios.cfghost_perfdata_command=process-host-perfdataservice_perfdata_command=process-service-perfdata
[root@nagios-server etc]# sed -n '234,245p' objects/commands.cfg# 'process-host-perfdata' command definitiondefine command{command_name process-host-perfdatacommand_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out}# 'process-service-perfdata' command definitiondefine command{command_name process-service-perfdatacommand_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out}
[root@nagios-server etc]# /etc/init.d/nagios checkconfig[root@nagios-server etc]# /etc/init.d/nagios reload
(1)设置让被监控的主机记录数据
[root@nagios-server ~]# cd /usr/local/nagios/etc/objects/[root@nagios-server objects]# sed -n '23,42p' hosts.cfg# Define a host for the local machinedefine host{use linux-server ; Name of host template to use; This host definition will inherit all variables that are defined; in (or inherited by) the linux-server host template definition.host_name web001alias web001address 192.168.2.152process_perf_data 1 #<==此行表示将记录 web001 主机的状态数据}define host{use linux-server ; Name of host template to use; This host definition will inherit all variables that are defined; in (or inherited by) the linux-server host template definition.host_name web002alias web002address 192.168.2.144process_perf_data 1}
(2)设置让被监控主机对应的服务记录数据
[root@nagios-server objects]# head -7 services.cfgdefine service {use generic-servicehost_name web001,web002service_description Disk Partitioncheck_command check_nrpe!check_diskprocess_perf_data 1 #<==此行表示将记录 web001 主机的剩余磁盘空间状态数据}
[root@nagios-server objects]# sed -n '154,176p' templates.cfg |sed -r 's#(.*);.*$#\1#g'name generic-serviceactive_checks_enabled 1passive_checks_enabled 1parallelize_check 1obsess_over_service 1check_freshness 0notifications_enabled 1event_handler_enabled 1flap_detection_enabled 1failure_prediction_enabled 1process_perf_data 1retain_status_information 1retain_nonstatus_information 1is_volatile 0check_period 24x7max_check_attempts 3normal_check_interval 10retry_check_interval 2contact_groups adminsnotification_options w,u,c,rnotification_interval 60notification_period 24x7register 0
[root@nagios-server etc]# /etc/init.d/nagios checkconfig[root@nagios-server etc]# /etc/init.d/nagios reload

(1)给被监控的所有主机添加超链接图标
[root@nagios-server objects]# sed -n '23,34p' hosts.cfg# Define a host for the local machinedefine host{use linux-server ; Name of host template to use; This host definition will inherit all variables that are defined; in (or inherited by) the linux-server host template definition.host_name web001alias web001address 192.168.2.152process_perf_data 1action_url /nagios/pnp/index.php?host=$HOSTNAME$ #<== 添加超链接图标}
[root@nagios-server etc]# /etc/init.d/nagios checkconfig[root@nagios-server etc]# /etc/init.d/nagios reload

(2)给被监控主机指定的服务添加超链接图标
[root@nagios-server objects]# head -8 services.cfgdefine service {use generic-servicehost_name web001,web002service_description Disk Partitioncheck_command check_nrpe!check_diskprocess_perf_data 1action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$ #<== 给具体服务添加超链接图标}
[root@nagios-server objects]# sed -n '177p' templates.cfg |sed -r 's#(.*);.*$#\1#g'action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$
[root@nagios-server etc]# /etc/init.d/nagios checkconfig[root@nagios-server etc]# /etc/init.d/nagios reload
