#!/usr/local/bin/perl # nagios: +epn =pod =head1 NAME check_snmp_tcpconns.pl - Check TCP connection states, numbers, and port distributions. =head1 SYNOPSIS This plugin uses the TCP-MIB tcpConnState table to check the following metrics: =over =item * How many TCP connections are present in and out of a device and how many unique source and destination IP addresses are present? =item * What is the connection state distribution (open, established, timWait, etc) for the device being checked? =item * How many connections TO or FROM the device are there by user-specified protocol (e.g. Web (80/443/8080), SMTP (25/587/465) etc. =back =head1 THRESHOLDS In all cases where multiple thresholds are specified, conditions are OR'd; if any of the conditions present are true, the threshold will be considered breached. For 'conn' mode thresholds are colon-separated lists of high limits for connections inbound and connections outbound. Example: -w 40:20 -c 90:30 This would request 'warn if there are more than 40 connections inbound or 20 connections outbound; return critical if there are more than 90 connections inbound or 30 connections outbound.' For 'state' mode this is a colon-separated list of list of conditionals to test against one or more of the TCP states Example: -w 'timeWait,gt,5:established,gt,100' -c 'timeWait,gt,15:established,gt,500' Where gt == >, lt == <, lte == <=, and gte == >= This would request 'warn if there are more than 5 connections in time wait state or more than 100 established connections; return critical if there are more than 5 established connections or more than 15 connections in time wait state.' For 'service' mode this is a colon separated list of conditionals to test against the service groupings you specified by passing them to the script using one or more -S arguments. For each service you define, you can also specify direction for tests by appending "_in" or "_out" to your service definition, e.g. "http_in,gt,5" would match only if the number of connections to the http service as you define it is greater than 5. Note: passing the special token 'other' in as an -S argument will let you test against a special bucket that holds connections that don't match any service definitions you provide, you can then use other, other_in, or other_out in your warning and critical threshold check specifications if you wish as well. Examples: 1. Check mail and FTP services, define mail as ports 25, 465, and 587 and define FTP and ports 20 and 21 -S 'mail,25,465,587' -S 'ftp,20,21' -w 'mail,gt,5:ftp,gt,10' -c 'mail,gt,20' 2. Check web vs other. -S 'web,80,443,8080,8443' -S other -w 'web,gt,50' -c 'web,gt,100:other,gt,100' In 'service' mode perfdata will include data for each service as a whole along with _in and _out broken out as well, example: ./check_snmp_tcpconns.pl --hostname my.example.com --snmp-version 3 \ --auth-username myusername \ --auth-password pass \ -M service \ -S other:mail,25,465,26,587 \ -S http,80:https,443 \ -S ftp,20,21:ssh,22:ensim,19638:imap,143,993 \ -S pop3,110,995 -w 'mail,gt,50' -c 'mail,gt,100' SNMP-TCP-CONNS OK - mail (12)| 'ensim'=0;0;0 'ensim_in'=0;0;0 'ensim_out'=0;0;0 'ftp'=0;0;0 'ftp_in'=0;0;0 'ftp_out'=0;0;0 'http'=4;0;0 'http_in'=2;0;0 'http_out'=2;0;0 'https'=0;0;0 'https_in'=0;0;0 'https_out'=0;0;0 'imap'=44;0;0 'imap_in'=44;0;0 'imap_out'=0;0;0 'mail'=12;0;0 'mail_in'=8;0;0 'mail_out'=4;0;0 'other'=20;0;0 'other_in'=0;0;0 'other_out'=20;0;0 'pop3'=11;0;0 'pop3_in'=11;0;0 'pop3_out'=0;0;0 'ssh'=1;0;0 'ssh_in'=1;0;0 'ssh_out'=0;0;0 =head1 USAGE Type ./check_snmp_tcpconns.pl --help =cut sub check_snmp_tcp_conns { use strict; use FindBin; use lib "$FindBin::Bin/lib"; use Nagios::Plugin::SNMP; my $USAGE = < is one of 'lt' (<), # 'gt' (>), 'gte' (>=), or 'lte' (<=) -w 'state,,N:state,,N:state,,N' -c 'state,,N:state,,N:state,,N' # mode 'service' where is one of 'lt', 'gt', 'gte', or 'lte' -S service_def1 ... -S service_defN [ -S other ] -w 'service_name,gte,5:service_two,gte,10' -c 'web,gte,50:pop3,gte,15:imap4,gte,15:cpanel,gte,5' } EOF my $LABEL = 'SNMP-TCP-CONNS'; my @CONN_STATES = qw( startOfTable closed listen synSent synReceived established finWait1 finWait2 closeWait lastAck closing timeWait deleteTCB ); my $plugin = Nagios::Plugin::SNMP->new( 'shortname' => $LABEL, 'usage' => $USAGE ); $plugin->add_arg( 'spec' => 'mode|M=s', 'help' => "-M, --mode\n" . " Check mode (conn, states, or services), defaults\n" . " to 'conn'.\n" . " In 'conn' mode, the plugin checks the number\n" . " of inbound and outbound connections and outputs\n" . " connections in, connections out, unique destination\n" . " IP addresses and unique source IP addresses\n\n" . " In 'state' mode the script will check the states\n" . " of TCP connections to and from the server.\n\n" . " In 'service' mode the script will check the numbers\n" . " of connections to or from the server based on\n" . " service groups you specify by passing definitions\n" . " to the script using the '-S' switch", 'default' => 'conn' ); $plugin->add_arg( 'spec' => 'include-port|P=i@', 'help' => "-P, --include-port\n" . " Limit results to just connections that have a \n" . " client OR remote port matching the ports passed\n" . " in as options.", 'required' => 0, 'default' => [] ); $plugin->add_arg( 'spec' => 'service|S=s@', 'help' => "-S, --service\n" . " Specify service groupings to use for service mode\n" . " (-M 'service') check calls. Pass in one or more \n" . " service group definition to the script, separated by\n" . " colons, in the following format:\n" . " 'service_name,port1,port2,range1-range4'\n" . " Example: -S 'mail,25-26,465,587:www,80,443,8080,8443'.", 'required' => 0, 'default' => [] ); $plugin->getopts; my $DEBUG = $plugin->opts->get('snmp-debug'); my $MODE = $plugin->opts->get('mode'); if ($MODE !~ m/^(conn|state|service)$/) { $plugin->nagios_die("Invalid check mode '$MODE', " . "must be 'conn', 'service', or 'state'!"); } my $WARN = $plugin->opts->get('warning'); my $CRIT = $plugin->opts->get('critical'); my %SERVICES; if ($MODE eq 'conn') { my @w = split(':', $WARN); my @c = split(':', $CRIT); if (scalar(@w) != 2) { $plugin->nagios_die("Warning option must contain 2 thresholds: " . "conns_in:conns_out"); } if (scalar(@c) != 2) { $plugin->nagios_die("Critical option must contain 2 thresholds: " . "conns_in:conns_out"); } } elsif ($MODE eq 'state') { my @w = split(':', $WARN); my @c = split(':', $CRIT); my @cs = @CONN_STATES; shift @cs; my $format_help = "Format: state,,number where state is one of " . join(', ', sort @cs) . " and op is one of " . "'gt', 'gte', 'lt', or 'lte'"; if (scalar(@w) < 1) { $plugin->nagios_die("Warning option must contain at least\n" . "one state check, e.g. 'timeWait,gte,44'\n" . $format_help); } for my $w (@w) { my ($lv, $op, $rv) = parse_cond_threshold($w, \@cs); if (!(defined($lv) && defined($op) && defined($rv))) { $plugin->nagios_die("Invalid warning threshold $w.\n" . $format_help); } } if (scalar(@c) < 1) { $plugin->nagios_die("Critical option must contain at " . "least one state check, e.g. 'timeWait,gt,44'"); } for my $c (@c) { my ($lv, $op, $rv) = parse_cond_threshold($c, \@cs); if (!(defined($lv) && defined($op) && defined($rv))) { $plugin->nagios_die("Invalid critical threshold $c.\n" . "$format_help"); } } } elsif ($MODE eq 'service') { my $format_help = <number where 'service' is one of the services you specified to the script (-S switch) and op is one of 'gt', 'gte', 'lt', or 'lte' EOF my @w = split(':', $WARN); my @c = split(':', $CRIT); my @svc_rules = @{$plugin->opts->get('service')}; if (scalar(@svc_rules) < 1) { $plugin->nagios_die(<number where 'service' is one of @{[join(', ', keys %SERVICES)]} and op is one of 'gt', 'gte', 'lt', or 'lte' EOF if (scalar(@w) < 1) { $plugin->nagios_die("Warning option must contain at least\n" . "one service check, e.g. 'mail,gt,22'\n" . $format_help); } if (scalar(@c) < 1) { $plugin->nagios_die("Critical option must contain at least\n" . "one service check, e.g. 'mail,gt,22'\n" . $format_help); } for my $w (@w) { my ($lv, $op, $rv) = parse_cond_threshold($w, [keys %SERVICES]); if (!(defined($lv) && defined($op) && defined($rv))) { $plugin->nagios_die("Invalid warning threshold $w.\n" . $format_help); } } for my $c (@c) { my ($lv, $op, $rv) = parse_cond_threshold($c, [keys %SERVICES]); if (!(defined($lv) && defined($op) && defined($rv))) { $plugin->nagios_die("Invalid critical threshold $c.\n" . $format_help); } } } else { # Should not happen $plugin->nagios_die("Invalid mode $MODE!"); } my @PORTS = @{$plugin->opts->get('include-port')}; my %wanted_ports; { local $_; %wanted_ports = map { $_ => 1 } @PORTS; } if (scalar(@PORTS) > 0) { debug("Limit to ports: " . join(', ', (sort keys %wanted_ports))); } # Walk the TCP conn table # RFC1213-MIB::tcpConnState .1.3.6.1.2.1.6.13.1.1 my $base_oid = '.1.3.6.1.2.1.6.13.1.1'; my $result = $plugin->walk($base_oid); debug("Retrieved TCP connection table"); # Close and destroy session $plugin->close(); my %conns; my %states = map { $_ => 0; } @CONN_STATES; delete $states{'startOfTable'}; foreach my $idx (keys %{$result->{$base_oid}}) { my ($local_ip, $local_port, $remote_ip, $remote_port) = ($idx =~ m/^ ${base_oid} \. (\d+\.\d+\.\d+\.\d+) \. (\d+) \. (\d+\.\d+\.\d+\.\d+) \. (\d+) /ix); # If user requested filtering, only count connections # For ports they requested if (scalar(@PORTS) > 0) { next unless ((exists $wanted_ports{$remote_port}) || (exists $wanted_ports{$local_port})); } # Skip listening sockets if in conn mode as conn mode is just # interested in incoming and outgoing connections, not listeners if ($MODE eq 'conn') { next if ($local_ip eq '0.0.0.0' && $remote_ip eq '0.0.0.0'); next if ($local_ip eq '0.0.0.0' && $local_port eq '0'); next if ($remote_ip eq '0.0.0.0' && $remote_port eq '0'); } $conns{"$idx"} = {} if ! exists $conns{"$idx"}; my $state_idx = $result->{$base_oid}->{$idx}; my $state = $CONN_STATES[$state_idx]; $conns{"$idx"}->{'state'} = $state; $states{$state}++; $conns{"$idx"}->{'localip'} = $local_ip; $conns{"$idx"}->{'localport'} = $local_port; $conns{"$idx"}->{'remoteip'} = $remote_ip; $conns{"$idx"}->{'remoteport'} = $remote_port; my $dir = ""; if ($local_port < $remote_port) { $conns{"$idx"}->{'direction'} = 'in'; $dir = "<-"; } else { $conns{"$idx"}->{'direction'} = 'out'; $dir = "->"; } debug("$local_ip:$local_port $dir $remote_ip:$remote_port") if $MODE eq 'conn'; } if ($DEBUG == 1) { dump_conns(\%conns) if $MODE eq 'conn'; dump_states(\%states) if $MODE eq 'state'; print "\n"; } my %results = ( 'ok' => [], 'warn' => [], 'crit' => [], 'perf_data' => [], ); my $ret = OK; if ($MODE eq 'conn') { check_conn_counts(\%conns, $WARN, $CRIT, \%results); } elsif ($MODE eq 'state') { my @cs = @CONN_STATES; shift @cs; check_conn_states(\%states, $WARN, $CRIT, \@cs, \%results); } elsif ($MODE eq 'service') { check_services(\%conns, $WARN, $CRIT, \%SERVICES, \%results); } print "$LABEL "; if (scalar(@{$results{'crit'}}) > 0) { print "CRITICAL - " . join(', ', @{$results{'crit'}}); $ret = CRITICAL; } if (scalar(@{$results{'warn'}}) > 0) { print ', ' if scalar(@{$results{'crit'}}) > 0; print "WARNING - " . join(', ', @{$results{'warn'}}); $ret = WARNING unless $ret == CRITICAL; } if (scalar(@{$results{'ok'}}) > 0) { print ', ' if ((scalar(@{$results{'crit'}}) > 0) || (scalar(@{$results{'warn'}}) > 0)); print "OK - " . join(', ', @{$results{'ok'}}); } print "| " . join(' ', @{$results{'perf_data'}}) . "\n"; exit($ret); sub perf_data { my $label = shift; my $count = shift; my $warn = shift; my $crit = shift; return "'$label'=$count;$warn;$crit"; } sub dump_conns { my $conns = shift; for my $idx (sort {$a cmp $b} keys %$conns) { my %info = %{$conns->{$idx}}; for my $key (sort keys %info) { print STDERR "$key:$info{$key} "; } print STDERR "\n"; } } sub dump_states { my $states = shift; for my $state (sort keys %$states) { print STDERR "'$state'=$states->{$state} "; } } sub dump_services { my $service_counts = shift; for my $svc (sort keys %$service_counts) { print STDERR "$svc: $service_counts->{$svc}\n"; } } sub get_conn_stats { my $conns = shift; my $in = 0; my $out = 0; # Local connections - XXX - either side is 127.0.0.N my %unique_ips = ( 'in' => {}, 'out' => {} ); for my $conn (keys %$conns) { my $ip = $conns->{"$conn"}->{'remoteip'}; next unless exists $conns->{"$conn"}->{'direction'}; if ($conns->{"$conn"}->{'direction'} eq 'in') { $unique_ips{'in'}->{$ip} = 1; $in++; } else { $unique_ips{'out'}->{$ip} = 1; $out++; } } return ($in, $out, scalar(keys %{$unique_ips{'in'}}), scalar(keys %{$unique_ips{'out'}})); } sub debug { return unless $DEBUG == 1; my $msg = shift; print STDERR scalar(localtime()) . ": $msg\n"; } sub check_conn_counts { my $conns = shift; my $warn_spec = shift; my $crit_spec = shift; my $info = shift; my ($conns_in, $conns_out, $unique_src, $unique_dst) = get_conn_stats($conns); my ($wci, $wco) = split(':', $warn_spec); my ($cci, $cco) = split(':', $crit_spec); if ($conns_in > $cci) { push(@{$info->{'crit'}}, "Connections in ($conns_in > $cci)"); } elsif ($conns_in > $wci) { push(@{$info->{'warn'}}, "Connections in ($conns_in > $wci)"); } else { push(@{$info->{'ok'}}, "Connections in ok ($conns_in < $wci)"); } if ($conns_out > $cco) { push(@{$info->{'crit'}}, "Connections out ($conns_out > $cco)"); } elsif ($conns_out > $wco) { push(@{$info->{'warn'}}, "Connections out ($conns_out > $wco)"); } else { push(@{$info->{'ok'}}, "Connections out ok ($conns_out < $wco)"); } push(@{$info->{'perf_data'}}, perf_data('conns_in', $conns_in, $wci, $cci)); push(@{$info->{'perf_data'}}, perf_data('conns_out', $conns_out, $wco, $cco)); push(@{$info->{'perf_data'}}, perf_data('unique_src', $unique_src, 0, 0)); push(@{$info->{'perf_data'}}, perf_data('unique_dst', $unique_dst, 0, 0)); return 1; } sub check_services { my $conns = shift; my $warn_spec = shift; my $crit_spec = shift; my $service_defs = shift; my $info = shift; my %conn_info = get_ports($conns); my %service_counts; my %checked; if (exists $service_defs->{'other'}) { $service_counts{'other'} = 0; $service_counts{'other_in'} = 0; $service_counts{'other_out'} = 0; $checked{'other'} = 1; $checked{'other_in'} = 1; $checked{'other_out'} = 1; } for my $def (keys %$service_defs) { $service_counts{$def} = 0; } # Total up all 'services' counts for my $conn (keys %conns) { # Port only counts in services totals if it is a server # port; for incoming connections that is the local # port, for outgoing connections that is the remote port my $direction = $conns{$conn}->{'direction'}; my $port; if ($direction eq 'in') { $port = $conns{$conn}->{'localport'}; } else { $port = $conns{$conn}->{'remoteport'}; } # See if it matches any services; if so # increment the general service bucket and # the bucket for the service_ bucket where # is in or out. my $matched = 0; for my $svc (keys %$service_defs) { my $check = $service_defs->{$svc}; $check =~ s#\$port#$port#g; $check =~ s#\$direction#$direction#g; my $result = eval_expr($check); debug("$port vs $svc: $check returned $result"); if ($result == 1) { $matched = 1; if (($svc =~ m/_in$/) && ($direction eq 'in') || ($svc =~ m/_out$/) && ($direction eq 'out')) { $service_counts{$svc}++; $matched = 2; } if ($matched != 2) { $service_counts{$svc}++; } } } # Magical 'other' bucket catches anything not # matched by a user-provided service rule if (($matched == 0) && (exists $service_counts{'other'})) { debug("Port $port - no matches, incrementing 'other'"); $service_counts{"other"}++; $service_counts{"other_$direction"}++; } } dump_services(\%service_counts) if $DEBUG == 1; # Now check thresholds against the warning and critical rules my @w = split(':', $warn_spec); my @c = split(':', $crit_spec); my %caught; for my $c (@c) { my ($service, $op, $value) = parse_cond_threshold($c, [keys %$service_defs]); my $count = $service_counts{$service}; my $result = eval_expr("$count $op $value"); $checked{$service} = 1; if ($result == 1) { debug("Service CRIT: $service ($count $op $value)"); push(@{$info->{'crit'}}, "$service ($count $op $value)"); $caught{$service} = 1; } } for my $w (@w) { my ($service, $op, $value) = parse_cond_threshold($w, [keys %$service_defs]); $checked{$service} = 1; next if exists $caught{$service}; my $count = $service_counts{$service}; my $result = eval_expr("$count $op $value"); if ($result == 1) { debug("Service WARN: $service ($count $op $value)"); push(@{$info->{'warn'}}, "$service ($count $op $value)"); $caught{$service} = 1; } } for my $key (sort keys %$service_defs) { next if exists $caught{$key}; next unless ((grep(/${key}\b/, @w)) || (grep(/${key}\b/, @c))); push(@{$info->{'ok'}}, "$key ($service_counts{$key})"); } # Create performance data for my $key (sort keys %$service_defs) { push(@{$info->{'perf_data'}}, perf_data($key, $service_counts{$key}, 0, 0)); } return $info; } sub get_ports { my $connections = shift; my %ports = ('in' => {}, 'out' => {}); for my $index (keys %$connections) { my $conn = $connections->{$index}; my $direction = $conn->{'direction'}; my $port; if ($direction eq 'in') { $port = $conn->{'localport'}; } else { $port = $conn->{'remoteport'}; } $ports{$direction}->{$port} = 0 unless exists $ports{$direction}->{$port}; $ports{$direction}->{$port}++; } return %ports; } sub parse_cond_threshold { my $expr = shift; my $valid_label_ref = shift; my $label_expr = join('|', @$valid_label_ref); my $ops = 'gt|gte|lte|lt'; my ($lv, $op, $rv) = split(',', $expr); my $real_op = ''; if (defined($lv) && defined($op) && defined($rv)) { if ($lv !~ m/^(?:$label_expr)$/i) { $lv = undef; } $op = lc($op); if ($op eq 'gt') { $real_op = '>'; } elsif ($op eq 'gte') { $real_op = '>='; } elsif ($op eq 'lt') { $real_op = '<'; } elsif ($op eq 'lte') { $real_op = '<='; } else { $real_op = undef; } if ($rv !~ m/^\d+$/) { $rv = undef; } } return ($lv, $real_op, $rv); } sub parse_service_rules { my $expr = shift; my %services; my @rules = split(':', $expr); for my $rule (@rules) { my ($label, @port_specs) = split(',', $rule); if (exists $services{$label}) { die "Service label '$label' specified twice!"; } die "Service specs: Invalid label '$label'" unless $label =~ m/\w+/; my @svc_tests; for my $spec (@port_specs) { if ($spec =~ m/^(\d+)$/) { push(@svc_tests, "(\$port == $1)"); } elsif ($spec =~ m/^(\d+)\-(\d+)$/) { push(@svc_tests, "((\$port >= $1) && (\$port <= $2))"); } else { die "Service specs: '$spec' is not a single port " . "or a range of ports!"; } } my $cond = join(' || ', @svc_tests); $services{$label} = $cond; $services{"${label}_in"} = "($cond) && ('\$direction' eq 'in')"; $services{"${label}_out"} = "($cond) && ('\$direction' eq 'out')"; debug("Service rule $label: $cond"); } return %services; } sub check_conn_states { my $states = shift; my $warn_spec = shift; my $crit_spec = shift; my $cs_ref = shift; my $info = shift; my %caught; for my $cspec (split(':', $crit_spec)) { my ($state, $op, $limit) = parse_cond_threshold($cspec, $cs_ref); my $actual = $states->{$state}; my $expr = "$actual $op $limit"; my $result = eval_expr($expr); debug("Critical $cspec: $expr returns $result"); if ($result == 1) { push(@{$info->{'crit'}}, "$state $op $limit ($expr)"); $caught{$state} = 1; } } for my $wspec (split(':', $warn_spec)) { my ($state, $op, $limit) = parse_cond_threshold($wspec, $cs_ref); next if exists $caught{$state}; my $actual = $states->{$state}; my $expr = "$actual $op $limit"; my $result = eval_expr($expr); debug("Warn $wspec: $expr returns $result"); if ($result == 1) { push(@{$info->{'warn'}}, "$state $op $limit ($expr)"); } else { push(@{$info->{'ok'}}, "$state = $actual"); } } for my $state (sort keys %$states) { push(@{$info->{'perf_data'}}, "'$state'=$states->{$state}"); } return 1; } sub eval_expr { my $expr = shift; my $result = 0; eval { $result = eval "($expr);"; die $@ if $@; }; $result = 0 if ((! defined $result) or ($result eq '')); return $result; } } exit check_snmp_tcp_conns();