#!/usr/local/bin/perl =pod =head1 NAME check_snmp_procs.pl - check process numbers by process state or by number of processes by process name =head1 DESCRIPTION This script allows you to check the process table on an SNMP agent that implements the HOST-RESOURCES-MIB. This script lets you monitor processes by process state or by number of processes running by process name and number of matching processes. Activate the state check mode by passing -m state to the script. Activate process number checking by passing in -m count to the script. You can specify warning and critical thresholds for each mode in the following format: metric,op,number Where op is one of: =over 4 =item * gt - > =item * lt - < =item * lte - <= =item * gte - >= =item * ne - != =item * eq - == =back You can create a string of OR'd conditions by separating each additional warning or critical threshold by colons. Example: --warning 'runnable,gt,50:invalid,gt,50' -c 'invalid,gt,100' You must specify a critical threshold for at least one metric; specifying a warning threshold is optional. =head2 Process State Mode To activate process state check mode, pass -m state to the script. The HOST-RESOURCES-MIB defines four process states and we define an additional metric (total) that you can use in 'state' mode. Total holds the total number of processes in the process table. =over 4 =item * Running - processes actively being serviced by the CPU =item * Runnable - processes waiting for system resources =item * Not runnable - processes that are in memory but not waiting to run =item * Invalid - process is idle, a zombie, or other state =item * Total - total number of processes in the process table =back Example threshold specification: --warning 'runnable,gt,50:invalid,gt,50' -c 'invalid,gt,100' In state mode, perfdata will be output with counts of the number of processes in each state and the total numbers of processes as well as warning and critical threshold numbers. Example: 'runnable'=4;50;0 'running'=100;0;0 'not_runnable'=1;0;0 'invalid'=50;0;100 =head2 Process Count Mode To activate process count check mode, pass -m 'count' to the script. In this mode you pass process match specifications to the script that indicate: =over 4 =item * The perl regular expression to use to match processes from the process table. =item * The friendly name to output for this process type in script output =back These process patterns should be passed to the script by appending one or more -M argument/value pairs to the script in the following format: -M 'perl-regular-expression:friendly name' Example: -M 'java.+-Xmx:tomcat' -M 'httpd:apache' -M 'sshd:ssh' Warning and critical thresholds can then be specified; for example, if we want to define the rules "Between 1 and 20 Apache processes, exactly 1 java process, and more than 0 MySQL processes" we would pass in this argument list to the script: -M 'java:tomcat' -M 'httpd:apache' -M 'mysqld:mysql -c 'apache,gt,20:apache,lt,1:tomcat,ne,1:mysql,gt,0' The script will output perfdata for every process definition passed into the script, regardless of whether you define a process count threshold for the process or not. Example: 'tomcat'=1;0;0 'httpd'=15;0;0 'mysqld'=4;0;0 =cut sub check_snmp_procs { use strict; use FindBin; use lib "$FindBin::Bin/lib"; use Nagios::Plugin::SNMP; use Nenm::Utils; my $LABEL = 'SNMP-PROCS'; my $USAGE = <new( 'shortname' => $LABEL, 'usage' => $USAGE ); $plugin->add_arg( 'spec' => 'mode|m=s', 'help' => "-m, --mode state|count\n" . " Specify the mode of operation for the script; in \n" . " 'state' mode the script will check the states of\n" . " all processes on the server; in 'count' mode the\n" . " script will check for numbers of critical processes.\n" . " See perldoc in this script for more information.", 'required' => 1 ); $plugin->add_arg( 'spec' => 'match|M=s@', 'help' => "-M, --match regex:friendly\n" . " Specify a perl regular expression to match against\n" . " the process table of the remote host, then a colon,\n" . " then the friendly name for the matched processes,\n" . " e.g. 'java.+-server|weblogic'", 'required' => 0, 'default' => [] ); $plugin->getopts; $Nenm::Utils::DEBUG = $plugin->opts->get('snmp-debug'); my $MODE = $plugin->opts->get('mode'); $plugin->nagios_die( "Invalid mode selected, valid modes are 'state' or 'count'") unless $MODE =~ m/^(?:state|count)$/; $plugin->nagios_die("Critical threshold required!") unless defined($plugin->opts->get('critical')); if ($MODE eq 'state') { # Check states of all processes on the remote host my %oids = qw( hrSWRunStatus .1.3.6.1.2.1.25.4.2.1.7 ); my %states = ( 'running' => {qw(value 0)}, 'runnable' => {qw(value 0)}, 'notRunnable' => {qw(value 0)}, 'invalid' => {qw(value 0)}, 'total' => {qw(value 0)} ); my %states_map = qw( 1 running 2 runnable 3 notRunnable 4 invalid ); my ($wthr, $werrs)= ([], []); if (defined $plugin->opts->warning) { ($wthr, $werrs) = Nenm::Utils::parse_multi_threshold( $plugin->opts->warning, \%states); } if (scalar(@$werrs) > 0) { $plugin->nagios_die("Errors found in warning thresholds " . "specified:\n " . join("\n ", @$werrs)); } my ($cthr, $cerrs) = Nenm::Utils::parse_multi_threshold($plugin->opts->critical, \%states); if (scalar(@$cerrs) > 0) { $plugin->nagios_die("Errors found in critical thresholds " . "specified:\n " . join("\n ", @$cerrs)); } my $snmp_results = $plugin->walk(values %oids); my $procs = $snmp_results->{$oids{'hrSWRunStatus'}}; for my $state (keys %$procs) { $states{$states_map{$procs->{$state}}}->{'value'}++; $states{'total'}->{'value'}++; my $sid = ($state =~ m/^.+\.(\d+)$/)[0]; Nenm::Utils::debug("Process $sid is $states_map{$procs->{$state}}"); } my $results = Nenm::Utils::check_multi_thresholds(\%states, $wthr, $cthr, ''); return Nenm::Utils::output_multi_results($LABEL, $results); } else { # Check for numbers of critical processes running on the # remote server by process name regular expression. my %matchers; $plugin->nagios_die("Need at least one --match specification!") unless scalar(@{$plugin->opts->get('match')}) > 0; my @MATCH_SPECS = @{$plugin->opts->get('match')}; for my $spec (@MATCH_SPECS) { Nenm::Utils::debug("Parsing $spec"); $plugin->nagios_die("Invalid format for matcher! Valid format " . "perl-regular-expression:friendly-name") unless ($spec =~ m/^(.+):(.+)$/); my ($regex, $friendly) = ($1, $2); $matchers{$friendly} = { 'value' => 0, 'regex' => $regex }; Nenm::Utils::debug("Set $friendly to $regex"); } my ($wthr, $werrs)= ([], []); if (defined $plugin->opts->warning) { ($wthr, $werrs) = Nenm::Utils::parse_multi_threshold( $plugin->opts->warning, \%matchers); } if (scalar(@$werrs) > 0) { $plugin->nagios_die("Errors found in warning thresholds " . "specified:\n " . join("\n ", @$werrs)); } my ($cthr, $cerrs) = Nenm::Utils::parse_multi_threshold($plugin->opts->critical, \%matchers); if (scalar(@$cerrs) > 0) { $plugin->nagios_die("Errors found in critical thresholds " . "specified:\n " . join("\n ", @$cerrs)); } my %oids = qw( hrSWRunPath 1.3.6.1.2.1.25.4.2.1.4 hrSWRunParameters 1.3.6.1.2.1.25.4.2.1.5 ); my $snmp_results = $plugin->walk(values %oids); my %processes; if (! defined $snmp_results->{$oids{'hrSWRunPath'}}) { $plugin->nagios_die("Could not retrieve hrSWRunPath " . "table from remote agent!"); } my %run_paths = %{$snmp_results->{$oids{'hrSWRunPath'}}}; for my $oid (keys %run_paths) { my $idx = ($oid =~ m/^.+\.(\d+)$/)[0]; $processes{$idx} = $run_paths{$oid}; Nenm::Utils::debug("Process $idx has path $run_paths{$oid}"); } if (! defined $snmp_results->{$oids{'hrSWRunParameters'}}) { $plugin->nagios_die("Could not retrieve hrSWRunParameters " . "table from remote agent!"); } my %run_params = %{$snmp_results->{$oids{'hrSWRunParameters'}}}; for my $oid (keys %run_params) { my $idx = ($oid =~ m/^.+\.(\d+)$/)[0]; next unless defined $run_params{$oid}; $processes{$idx} .= " $run_params{$oid}"; Nenm::Utils::debug("Process $idx has params $run_params{$oid}"); } # Now check each process against our regexes to determine if we match # them or not; if we match, increment the counter for the matcher. for my $proc (sort keys %processes) { my $cmd_line = $processes{$proc}; for my $matcher (keys %matchers) { my $regex = $matchers{$matcher}->{'regex'}; if ($cmd_line =~ m{$regex}i) { Nenm::Utils::debug("$matcher: $cmd_line =~ $regex"); $matchers{$matcher}->{'value'}++; } } } my $results = Nenm::Utils::check_multi_thresholds(\%matchers, $wthr, $cthr, ''); return Nenm::Utils::output_multi_results($LABEL, $results); } } exit check_snmp_procs();