:
#
# aodmon (Online Diagnostics)
#


# check for superuser id

if [ `logname` != "root" ]
then
	echo "Must be super-user to execute aodmon routine"
	exit 1
fi

# preset the usage message
USAGE="aodmon usage: aodmon [-a 'arg'] [-hsv]"

# preset the number of days' error files to use
PAST=3

# determine invocation option(s)
set -- `getopt a:hsv $*`
if [ $? != 0 ]
then
	echo $USAGE
	exit 2
fi
for i in $*
do
	case $i in
	-a) PAST="$2"; shift; shift;;
	-h) HEADINGS="-h"; shift;;
	-s) INCLSUM="-s"; shift;;
	-v) VERBOSE="-v"; shift;;
	--) shift; break;;
	esac
done

# set default working directory to error logger default
cd /usr/adm/streams	

# check for simultaneous "aod" processes
if [ -f aodnow ]
then
	if [ -d aodnow -o -b aodnow -o -c aodnow -o -p aodnow ]
	then
		# "aodnow" interlock file supposed to be simple file
		# but we got here because it wasn't
		# notify terminal user and/or (via mail) root
		echo "on-line diag invocation interlock is improper type"
		echo "aodmon error: on-line diagnostic invocation interlock file\n\/usr\/adm\/streams\/aodnow is of an improper type. the aodmon routine is\n
consequently blocked from execution!" | mail root
		exit 1
	else
		# previous invocation running now
		# notify terminal user and/or (via mail) root
		# contents of aodnow contain the previously running aodmon
		# process' PID number
		oldproc=`cat aodnow | awk ' { printf("%s", $1) }' `
		echo "aodmon process pid:$oldproc already currently running (still)"
		oldstart1=`ps -fe | grep aodmon | awk "{if (\\$2==$oldproc)print \\$5}"`
		oldstart2=`ps -fe | grep aodmon | awk "{if (\\$2==$$)print \\$5}"`
		echo "Overlapping invocations of aodmon process\npid: $oldproc started at $oldstart1\npid: $$ started at $oldstart2" | mail root
		exit 0
	fi
else
	# create interlock file to suppress successive (overlapping) invocations
	echo $$ > aodnow
	# remove interlock file on exit and signals
	trap "rm -f aodnow; exit 1" 0 2 3 15
fi


# if any error and callfiles files exist, remove them
rm -f error.[01][0-9]-[0-3][0-9] 
rm -f *.callfile 

# create error files in error.mm-dd format

cp /usr/adm/messages $$.tmpmsg
egrep "^WARNING|^NOTICE|^PANIC|^ERROR|^DANGER|^Mon|^Tue|^Wed|^Thu|^Fri|^Sat|^Sun|^status" $$.tmpmsg > $$.msgs

# remove temporary error files on exit and interrupts
trap "rm -f $$.msgs $$.tmpmsg; exit 1" 0 2 3 15

cat $$.msgs | awk -f /usr/lib/mparse.awk
rm -f $$.msgs $$.tmpmsg


# check that the threshold detector "threshdet" exists & is executable
if [ ! -f /usr/lib/threshdet ]
then
	echo "threshold detector routine not present"
	exit 1
elif [ ! -x /usr/lib/threshdet ]
then
	echo "threshold detector routine not executable"
	exit 1
fi

# on premature exit, remove error report file(s)
trap "rm -f $$.errpt; rm -f $$.callfile; rm -f aodnow; exit 1" 0 2 3 15

# call the threshold detector "threshdet" to summarize todays errors & summary
/usr/lib/threshdet -o $$ $PAST $HEADINGS $VERBOSE $INCLSUM

# check for excessive errors

if [ $? -eq 1 ]
then
	# notify root via mail of any and all threshold level error activity

	echo "Notifying 'root' of error threshold level activity"
	mail root < $$.errpt

	# if error limit exceeded, notify remote if that is called for action
	# according to corresponding entries in error report file
	# if lines from the error report file contain the call flag "C", write
	# them to the call file

	awk 'BEGIN{cnt=0}{if($3=="C")print $0}' $$.errpt > $$.callfile

	# if lines were written to the call file, try to call remote system

	if [ -s $$.callfile ]
	then
		if [ ! -f /usr/lib/aodnotf ]
		then
			echo "aodmon: remote notifying routine \`aodnotf' missing" | tee /dev/tty | mail root
			exit
		elif [ ! -x /usr/lib/aodnotf ]
		then
			echo "aodmon: remote notifying routine \`aodnotf' not executable" | tee /dev/tty | mail root
			exit
		fi

		echo "Calling Remote notifier: aodnotf"
		sh /usr/lib/aodnotf -o $$ -v  
		if [ $? -ne 0 ]
		then
			echo "aodmon: `date`: error trying to notify remote diagnostic station" | tee /dev/tty | mail root
			rm -f $$.errpt; rm -f $$.callfile; rm -f aodnow
			exit
		fi
        else
                rm -f $$.callfile
        fi	

else
	# no threshold level error activty, so...
	# remove (empty) error reporting file when done
	rm $$.errpt
fi

find /usr/adm/streams -name '*.errpt' -exec rm {} \;

# remove "current" file interlock
rm aodnow
trap 0 2 3 15

# done, so get out of here
exit 0

