From: Stephen L Johnson Date: Thu, 25 Nov 1999 00:57:04 +0000 (+0000) Subject: Initial revision X-Git-Tag: spong-2_6-beta7~10 X-Git-Url: http://git.etc.gen.nz/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=372cdcaa1ab473d1f36137c20a52b41ea4314785;p=spong.git Initial revision --- diff --git a/src/spong-client.pl b/src/spong-client.pl new file mode 100755 index 0000000..f1e7c23 --- /dev/null +++ b/src/spong-client.pl @@ -0,0 +1,249 @@ +#!@@PERL@@ +# +# Spong client monitoring script. This runs various tests locally on a machine +# and sends those results to the Spong server machine where the information +# is summarized and displayed. This script currently checks the following: +# +# * disk (check to make sure that disks are not filling up) +# * cpu (check to make sure the load is reasonable) +# * jobs (check to make sure certain key processes are running) +# * msgs (scan through logs looking for problems) +# +# History: +# (1) Ported bb-local.sh script to perl. (Ed Hill Feb 26, 1997) + +use lib '@@LIBDIR@@'; + +use Sys::Hostname; +use Socket; + +use Spong::Daemon; + +srand( time() ^ ($$ + ($$ << 15 )) ); + +if( $ARGV[0] eq "--debug" ) { $debug = 1; shift; } +if( $ARGV[0] eq "--restart" ) { $restart = 1; shift; } +if( $ARGV[0] eq "--kill" ) { $kill = 1; shift; } +if( $ARGV[0] eq "--nosleep" ) { $nosleep = 1; shift; } +if( $ARGV[0] eq "--refresh" ) { $nosleep = 1; shift; } + +$me = "@@BINDIR@@/spong-client"; +$conf_file = $ARGV[0] || "@@ETCDIR@@/spong.conf"; +($HOST) = gethostbyname(&Sys::Hostname::hostname()); +$HOST =~ tr/A-Z/a-z/; + +$CHECKS = ""; + +&load_config_files(); # Loads the user specified configuration information +Spong::Daemon::Daemonize # Daemonize if not signalling or debugging + unless ($restart || $kill || $nosleep || $debug); +&handle_signals(); # Set up handlers, and signal the current server if asked + +%CHECKFUNCS = (); +&load_checks(); + +# Pretty simple program, check the stuff that we are looking at, report it to +# the server, sleep, and do it all again... + +while( 1 ) { + + my ($check); + foreach $check (split / /,$CHECKS) { + eval { &{$CHECKFUNCS{$check}}(); }; + if ($@) { &error( "Error running check $check: $@") }; + } + + # If we are suppose to stay alive, then sleep about $SPONGSLEEP seconds, we + # add a little randomness so that things don't get in sync and pound the + # spong-server. Otherwise, just exit. + + if( $nosleep ) { + last; + } else { + my $sleep = int($SPONGSLEEP - (.05 * $SPONGSLEEP) + + rand(.1 * $SPONGSLEEP)); + &debug( "sleeping for $sleep seconds" ); + sleep $sleep; + } +} + +unlink( "$SPONGTMP/spong-client.pid" ) unless $nosleep; +exit(0); + + +# --------------------------------------------------------------------------- +# &status( SERVERADDR, HOST, SERVICE, COLOR, SUMMARY, MESSAGE ) +# +# This function sends information to the Spong server. It reports the current +# status of a service, and sends along a string of information that might be +# helpful in diagnosing the problem. This code is modeled after the bb +# program, but is a little different in that it handles multi-line messages +# and send over the time as an int, rather then a string. +# --------------------------------------------------------------------------- + +sub status { + my( $addr, $host, $cat, $color, $summary, $message ) = @_; + my( $iaddr, $paddr, $proto, $line, $ip, $ok ); + + if( $addr =~ /^\s*((\d+\.){3}\d+)\s*$/ ) { + $ip = $addr; + } else { + my( @addrs ) = (gethostbyname($addr))[4]; + my( $a, $b, $c, $d ) = unpack( 'C4', $addrs[0] ); + $ip = "$a.$b.$c.$d"; + } + + $iaddr = inet_aton( $ip ) || die "no host: $addr\n"; + $paddr = sockaddr_in( $SPONG_UPDATE_PORT, $iaddr ); + $proto = getprotobyname( 'tcp' ); + + # Set an alarm so that if we can't connect "immediately" it times out. + + $SIG{'ALRM'} = sub { die }; + alarm(30); + + eval <<'_EOM_'; + socket( SOCK, PF_INET, SOCK_STREAM, $proto ) || die "socket: $!"; + connect( SOCK, $paddr ) || die "connect: $!"; + select((select(SOCK), $| = 1)[0]); + print SOCK "status $host $cat $color ", time(), " $summary\n"; + print SOCK "$message\n"; + close( SOCK ) || die "close: $!"; + $ok = 1; +_EOM_ + + alarm(0); + print STDERR scalar localtime, " can't connect to spong server.\n" if ! $ok; +} + + +# =========================================================================== +# Utility functions, and signal handlers... +# =========================================================================== + +# Load our configuration variables, including anything specific to the host +# that we are running on. + +sub load_config_files { + require $conf_file || die "Can't load $conf_file: $!"; + if( -f "$conf_file.$HOST" ) { + require "$conf_file.$HOST" || die "Can't load $conf_file.$HOST: $!"; + } else { + my $tmp = (split( /\./, $HOST ))[0]; + if( -f "$conf_file.$tmp" ) { # for lazy typist + require "$conf_file.$tmp" || die "Can't load $conf_file.$tmp: $!"; + } + } + &debug( "configuration file(s) loaded" ); +} + +# This is part of the set up code, this sets up the signal handlers, and +# handles any command line arguments that are given to us that tell us to +# signal the current running spong-server program. + +sub handle_signals { + + # Set up some signal handlers to handle our death gracefully, and also + # listen for the HUP signal, and if we se that, we re-exec ourself. + + $SIG{'QUIT'} = \&exit_handler; + $SIG{'HUP'} = \&hup_handler; + $SIG{'USR1'} = \&hup_handler; + $SIG{'PIPE'} = \&pipe_handler; + + # If the user gives us the --restart or --kill flags, then we signal the + # currently running spong-client process, and tell it to either die, or + # re-exec itself (which causes it to re-read it's configuration files. + + if( $restart || $kill ) { + open( PID, "$SPONGTMP/spong-client.pid") || die "Can't find pid: $!"; + my $pid = ; chomp $pid; + close PID; + + if( $restart ) { + &debug( "telling pid $pid to restart" ); kill( 'HUP', $pid ); } + if( $kill ) { + &debug( "telling pid $pid to die" ); kill( 'QUIT', $pid );} + + exit(0); + } + + # Write our pid to the spong tmp directory. + + system( "echo $$ >$SPONGTMP/spong-client.pid" ) unless $nosleep; +} + + +# Output functions, one for debugging information, the other for errors. + +sub debug { print scalar localtime, " ", $_[0], "\n" if $main::debug; } +sub error { warn scalar localtime(), " Error: ", $_[0], "\n"; } + + +# Fork from the parent and become a daemon +sub daemonize { + + my( $pid ); + + # Try to fork + $pid = fork(); + if (! defined $pid) { + die "ERROR: Could not fork: $!"; + } elsif ($pid) { + # I'm the parent, so just exit gracefully + exit(0); + } else { + # I'm the child + + # If we are not debuging, disconnect from the console + if ( ! $debug ) { + open(STDIN,"/dev/null"); + open(STDERR,">/dev/null"); + } + + # Become session group leader + POSIX::setsid(); + } +} + + +# Signal handlers... + +sub exit_handler { + &debug( "caught QUIT signal, exiting..." ); + unlink "$SPONGTMP/spong-client.pid" if -f "$SPONGTMP/spong-client.pid"; + exit(0); +} + +sub hup_handler { + &debug( "caught HUP signal, restarting..." ); + unlink "$SPONGTMP/spong-client.pid" if -f "$SPONGTMP/spong-client.pid"; + $SIG{$_[0]} = \&hup_handler; + if( $debug ) { exec $me, "--debug"; } else { exec $me; } + exit(0); +} + +sub pipe_handler { + wait(); + &debug( "caught $_[0] signal." ); + $SIG{$_[0]} = \&pipe_handler; +} + +# +# Load client checking functions as defined the $HOSTS checks field +# Default to old checks if 'checks' is not present (cpu disk msgs jobs [local]) + +sub load_checks { + + if (! defined $CHECKS || ! $CHECKS ) { + $CHECKS = 'disk cpu processes logs'; + if (defined &check_local) { $CHECKS .= ' local'; } + }; + + foreach $check (split / /,$CHECKS) { + &debug( "Loading client check $check" ); + eval "require 'Spong/Client/plugins/check_$check';"; + if ( $@ ) { &error( "Could not load $check check plugin" ); } + } +} diff --git a/src/spong-network.pl b/src/spong-network.pl new file mode 100755 index 0000000..7faea70 --- /dev/null +++ b/src/spong-network.pl @@ -0,0 +1,416 @@ +#!@@PERL@@ +# +# Spong network monitoring script. This runs various tests against IP based +# network services (like nntp, smtp, http, pop, etc...) If it can not +# connect to a service, then it sends a message to the spong server machine +# about the problems. This script currently checks the following: +# +# * ping (try connecting to it period...) +# * ftp, pop3, smtp, http, nntp, imap (a suite of tcp based services) +# * dns (if the Net::DNS module is installed) +# +# I'm not currently using the non-core Net:: tools since that would require an +# installation of a seperate perl package, but I probably will at some point +# +# History: +# (1) Ported bb-network.sh script to perl. (Ed Hill Feb 27, 1997) +# (2) Converted checks to new plugin mechanism (Stephen Johnson May 28, 1999) +# Added user-configurable escalation mechanism + +use Carp; +use lib "@@LIBDIR@@"; + +$CRIT_WARN_LEVEL = 1; + +use Sys::Hostname; +use Net::Ping; +use Socket; + +use Spong::Daemon; + +srand( time() ^ ($$ + ($$ << 15 )) ); + +if( $ARGV[0] eq "--debug" ) { $debug = 1; shift; } +if( $ARGV[0] eq "--restart" ) { $restart = 1; shift; } +if( $ARGV[0] eq "--kill" ) { $kill = 1; shift; } +if( $ARGV[0] eq "--nosleep" ) { $nosleep = 1; shift; } +if( $ARGV[0] eq "--refresh" ) { $nosleep = 1; shift; } + +$me = "@@BINDIR@@/spong-network"; +$conf_file = $ARGV[0] || "@@ETCDIR@@/spong.conf"; +$hosts_file = "@@ETCDIR@@/spong.hosts"; +($HOST) = gethostbyname(&Sys::Hostname::hostname()); +$HOST =~ tr/A-Z/a-z/; + +&load_config_files(); # Loads the user specified configuration information +Spong::Daemon::Daemonize() # Daemonize if not signalling or a one-shot + unless ($nosleep || $restart || $kill || $debug ); +&handle_signals(); # Set up handlers, and signal the current server if asked + +%PLUGINS = {}; +&config_funcs(); + +# Do the various network tests for each host. + +while( 1 ) { + foreach $host ( @HOSTS_LIST ) { + &debug( "checking network services ($checks) on $host" ); + + my $check; + foreach $check (('ping',split(/\s+/,$HOSTS{$host}->{'services'}))) { + &do_check($host,$check); + } + } + + # If we are suppose to stay alive, then sleep about $SPONGSLEEP seconds, we + # add a little randomness so that things don't get in sync and pound the + # spong-server. Otherwise, just exit. + + if( $nosleep ) { + last; + } else { + my $sleep = int($SPONGSLEEP - (.05 * $SPONGSLEEP) + + rand(.1 * $SPONGSLEEP)); + &debug( "sleeping for $sleep seconds" ); + sleep $sleep; + } +} + +unlink( "$SPONGTMP/spong-client.pid" ) unless $nosleep; +exit(0); + + +# This routine performs the checks and determines what status code to send +# to the spong-server. + +sub do_check { + my ($host,$service) = @_; + my ( $crit_count); + + # Get the hosts current color and warning count for service + if (defined $HOSTS{$host}->{'service'}->{$service}->{'count'} ) { + $crit_count = $HOSTS{$host}->{'service'}->{$service}->{'count'}; + } else { + $crit_count = 0; + } + + # Call the check function as referenced by the PLUGIN hash + eval { + ($status,$summary,$message) = (&{$PLUGINS{$service}}($host)); + }; + + if ($@) { + &error("No check function defined for $service: $@"); + return; + } + + my $color; + + # If current status is critical, increment the critcal count counter + # If counter < $CRIT_WARN_LEVEL, reduce status to yellow + # else pass a critical as a critical + # If current status is not red, reset the critical level counter. +# if ($status eq 'red') { +# $crit_count += 1; +# $color = ($crit_count < $CRIT_WARN_LEVEL) ? 'yellow' : 'red'; +# $summary = "($crit_count/$CRIT_WARN_LEVEL) " . $summary +# } else { + $crit_count = 0; + $color = $status; +# } + + &debug("$status - $crit_count - $CRIT_WARN_LEVEL - $color - $summary"); + + # Save the critical counter in the host for the service +# if ($color ne "green") { +# $HOSTS{$host}->{'service'}->{$service}->{'count'} = $crit_count; +# } else { +# undef $HOSTS{$host}->{'service'}->{$service}->{'count'}; +# } + + &status( $SPONGSERVER, $host, $service, $color, $summary, $message ); +} + +# A generic tcp port checking routine. You give this function a hostname, a +# port, a message to send (can be ""), a return regular expression string to +# check for, and the name of the service. This will go out connect to that +# port and check to make sure you get back expected results. + +sub check_simple { + my( $host, $port, $send, $check, $service ) = @_; + my( $color, $summary ) = ( "red", "" ); + my( $attempt, $start, $message, $diff ); + + for $a ( 1..3 ) { + $start = time(); + $message = &check_tcp( $host, $port, $send ); + $diff = time() - $start; + + $attempt = $a; + if( $message =~ /$check/ ) { $color = "green"; last; } + &debug("Attempt $a failed."); + } + + $summary = "$service down" if $color eq "red"; + $summary = "$service ok - $diff second response time" if $color eq "green"; + $summary .= ", attempt $attempt" if ($attempt != 1 && $color eq "green"); + + &debug( "$service - $host - $color, $summary" ); + return( $color, $summary, $message ); +} + + + +# --------------------------------------------------------------------------- +# &check_tcp( HOST, PORT, DATA ) +# +# This function will make a connection to a port at a given port, and send a +# message, it will then return what it gets back to the caller of this +# function. +# --------------------------------------------------------------------------- + +sub check_tcp { + my( $addr, $port, $data ) = @_; + my( $iaddr, $paddr, $proto, $line, $ip ); + + if( $addr =~ /^\s*((\d+\.){3}\d+)\s*$/ ) { + $ip = $addr; + } else { + my( @addrs ) = (gethostbyname($addr))[4]; + my( $a, $b, $c, $d ) = unpack( 'C4', $addrs[0] ); + $ip = "$a.$b.$c.$d"; + } + + $iaddr = inet_aton( $ip ) || return -1; + $paddr = sockaddr_in( $port, $iaddr ); + $proto = getprotobyname( 'tcp' ); + + # Set an alarm so that if we can't connect "immediately" it times out. + # Poor man's exception handling in perl... + + $SIG{'ALRM'} = sub { die }; + alarm(10); + + eval <<'_EOM_'; + socket( SOCK, PF_INET, SOCK_STREAM, $proto ) || return -2; + connect( SOCK, $paddr ) || return -3; + select((select(SOCK), $| = 1)[0]); + print SOCK "$data"; + recv( SOCK, $line, 256, 0 ); # just grab a chunk from the service. + close( SOCK ) || return -4; +_EOM_ + + alarm(0); + return $line; +} + + +# --------------------------------------------------------------------------- +# &status( SERVERADDR, HOST, SERVICE, COLOR, SUMMARY, MESSAGE ) +# +# This function sends information to the Spong server. It reports the current +# status of a service, and sends along a string of information that might be +# helpful in diagnosing the problem. This code is modeled after the bb +# program, but is a little different in that it handles multi-line messages +# and send over the time as an int, rather then a string. +# --------------------------------------------------------------------------- + +sub status { + my( $addr, $host, $cat, $color, $summary, $message ) = @_; + my( $iaddr, $paddr, $proto, $line, $ip, $ok ); + + if( $addr =~ /^\s*((\d+\.){3}\d+)\s*$/ ) { + $ip = $addr; + } else { + my( @addrs ) = (gethostbyname($addr))[4]; + my( $a, $b, $c, $d ) = unpack( 'C4', $addrs[0] ); + $ip = "$a.$b.$c.$d"; + } + + $iaddr = inet_aton( $ip ) || die "no host: $addr\n"; + $paddr = sockaddr_in( $SPONG_UPDATE_PORT, $iaddr ); + $proto = getprotobyname( 'tcp' ); + + # Set an alarm so that if we can't connect "immediately" it times out. + + $SIG{'ALRM'} = sub { die }; + alarm(30); + + eval <<'_EOM_'; + socket( SOCK, PF_INET, SOCK_STREAM, $proto ) || die "socket: $!"; + connect( SOCK, $paddr ) || die "connect: $!"; + select((select(SOCK), $| = 1)[0]); + print SOCK "status $host $cat $color ", time(), " $summary\n"; + print SOCK "$message\n"; + close( SOCK ) || die "close: $!"; + $ok = 1; +_EOM_ + + alarm(0); + print STDERR scalar localtime, " can't connect to spong server.\n" if ! $ok; +} + + + +# =========================================================================== +# Utility functions, and signal handlers... +# =========================================================================== + +# Load our configuration variables, including anything specific to the host +# that we are running on. + +sub load_config_files { + require $conf_file || die "Can't load $conf_file: $!"; + if( -f "$conf_file.$HOST" ) { + require "$conf_file.$HOST" || die "Can't load $conf_file.$HOST: $!"; + } else { + my $tmp = (split( /\./, $HOST ))[0]; + if( -f "$conf_file.$tmp" ) { # for lazy typist + require "$conf_file.$tmp" || die "Can't load $conf_file.$tmp: $!"; + } + } + &debug( "configuration file(s) loaded" ); + + # Read in the spong.hosts file. We are a little nasty here in that we do + # some junk to scan through the file so that we can maintain the order of + # the hosts as they appear in the file. + + open( HOSTS, $hosts_file ) || die "Can't load $hosts_file: $!"; + while( ) { + $evalme .= $_; + if( /^\s*%HOSTS\s*=\s*\(/ ) { $inhosts = 1; } + if( $inhosts && /^\s*[\'\"]?([^\s\'\"]+)[\'\"]?\s*\=\>\s*\{/ ) { + push( @HOSTS_LIST, $1 ); + } + } + close( HOSTS ); + eval $evalme || die "Invalid spong.hosts file: $@"; + + # Fallback, if we didn't read things correctly... + + if( sort @HOSTS_LIST != sort keys %HOSTS ) { + @HOSTS_LIST = sort keys %HOSTS; } + &debug( "host file loaded" ); +} + +# This is part of the set up code, this sets up the signal handlers, and +# handles any command line arguments that are given to us that tell us to +# signal the current running spong-server program. + +sub handle_signals { + + # Set up some signal handlers to handle our death gracefully, and also + # listen for the HUP signal, and if we se that, we re-exec ourself. + + $SIG{'QUIT'} = \&exit_handler; + $SIG{'HUP'} = \&hup_handler; + $SIG{'PIPE'} = \&pipe_handler; + + # If the user gives us the --restart or --kill flags, then we signal the + # currently running spong-network process, and tell it to either die, or + # re-exec itself (which causes it to re-read it's configuration files. + + if( $restart || $kill ) { + open( PID, "$SPONGTMP/spong-network.pid") || die "Can't find pid: $!"; + my $pid = ; chomp $pid; + close PID; + + if( $restart ) { + &debug( "telling pid $pid to restart" ); kill( 'HUP', $pid ); } + if( $kill ) { + &debug( "telling pid $pid to die" ); kill( 'QUIT', $pid );} + + exit(0); + } + + # Write our pid to the spong tmp directory. + + system( "echo $$ >$SPONGTMP/spong-network.pid" ) unless $nosleep; +} + + +# Output functions, one for debugging information, the other for errors. + +sub debug { print scalar localtime, " ", $_[0], "\n" if $main::debug; } +sub error { warn scalar localtime(), " Error: ", $_[0], "\n"; } + + +# Handle some signals... + +sub exit_handler { + &debug( "caught QUIT signal, exiting..." ); + unlink "$SPONGTMP/spong-network.pid" if "$SPONGTMP/spong-network.pid"; + exit(0); +} + +sub hup_handler { + &debug( "caught HUP signal, restarting..." ); + unlink "$SPONGTMP/spong-network.pid" if "$SPONGTMP/spong-network.pid"; + if( $debug ) { exec $me, "--debug"; } else { exec $me; } +} + +sub pipe_handler { + wait(); + &debug( "caught $_[0] signal." ); + $SIG{$_[0]} = \&pipe_handler; +} + + +# This is a tempory kludge until I create the check function plugins +# mechanism + +sub config_funcs { +# $PLUGINS{'ping'} = \&check_ping; +# $PLUGINS{'ftp'} = \&check_ftp; +# $PLUGINS{'pop'} = \&check_pop; +# $PLUGINS{'smtp'} = \&check_smtp; +# $PLUGINS{'nntp'} = \&check_nntp; +# $PLUGINS{'imap'} = \&check_imap; +# $PLUGINS{'dns'} = \&check_dns; +# $PLUGINS{'http'} = \&check_http; +# $PLUGINS{'nfs'} = \&check_nfs; + + # Consolidate all of the service to be checked into a unique list + my (%checks,$check); + foreach $host ( @HOSTS_LIST ) { + foreach $check (split(/\s+/,$HOSTS{$host}->{'services'})) { + $checks{$check} = 1; + } + } + + my $plugin; + foreach $plugin ( ('ping',keys(%checks)) ) { + &debug("Loading $plugin plugin"); + eval "require 'Spong/Network/plugins/check_$plugin';"; + if ( $@ ) { &error("Could not load $plugin plugin: $@"); } + } +} + +# Fork into the backgroup and disconnect our console and become +# the session group leader + +sub daemonize { + + my ($pid); + + # Try to fork + $pid = fork(); + if (! defined $pid) { + die "ERROR: Could not fork: $!"; + } elsif ($pid) { + # I'm the parent, so just exit gracefully + exit(0); + } else { + # I'm the child + + # Disconnect from the console + open(STDIN,"/dev/null"); + open(STDERR,">/dev/null"); + + # Become session group leader + POSIX::setsid(); + } +} + +