hadoop-ansible/roles/nagios-server/files/nagios-checks/check_hadoop_datanode

74 lines
1.9 KiB
Perl

#!/usr/bin/perl -w
#
# Run via nrpe on the hdfs master
#
use strict;
use Socket;
use FileHandle;
my $node;
my $live_node;
my $decommissioned_node;
my $dead_node;
my $warn_node;
my $crit_node;
my $http;
my $buf;
my ($host, $port, $url, $ip, $sockaddr);
my $ST_OK=0;
my $ST_WR=1;
my $ST_CR=2;
my $ST_UK=3;
$host = $ARGV[0];
$warn_node = $ARGV[1]; #WARNING when there is less than this number of nodes alive
$crit_node = $ARGV[2]; #CRITICAL when there is less than this number of nodes alive
$port = 50070;
$url = '/dfshealth.jsp';
# create socket
$ip = inet_aton($host) || die "CRITICAL - host($host) not found.\n";
$sockaddr = pack_sockaddr_in($port, $ip);
socket(SOCKET, PF_INET, SOCK_STREAM, 0) || die "CRITICAL - socket error.\n";
# connect socket
connect(SOCKET, $sockaddr) || die "CRITICAL - connect $host $port error.\n";
autoflush SOCKET (1);
print SOCKET "GET $url HTTP/1.0\n\n";
while ($buf=<SOCKET>) {
$_ = $buf;
# get the numobe of "Live Nodes" from the response of http request
if( /Live Nodes.*Decommissioning Nodes/){
$node = $&;
$node =~ s/\s//g;
$node =~ s/\)//g;
$node =~ s/<[^>]*>//gs;
$node =~ s/LiveNodes//g;
$node =~ s/\(Decommissioned//g;
$node =~ s/DeadNodes//g;
$node =~ s/DecommissioningNodes//g;
$live_node = (split(/:/, $node))[1];
$decommissioned_node = (split(/:/, $node))[2];
$dead_node = (split(/:/, $node))[3];
}
}
close(SOCKET);
#print ("Node: $node, live nodes: $live_node, decommissioned nodes: $decommissioned_node, dead nodes: $dead_node\n");
if ($live_node < $crit_node ){
print "CRITICAL - Datanodes up and running: $live_node, dead: $dead_node \n";
exit($ST_CR);
} elsif ($live_node < $warn_node ){
print "WARNING - Datanodes up and running: $live_node, dead: $dead_node \n";
exit($ST_WR);
}else{
print "OK - Datanodes up and running: $live_node, dead: $dead_node \n";
exit($ST_OK);
}