:
#
# threshdet (Online Diagnostics)
#

# check for super-user

if [ `logname` != "root" ]
then
	echo "Must be super-user to execute threshdet"
	exit 1
fi

# preset threshold error return code (to non-error status).
exceeded=0
# preset default number of days to summarize
PAST=3
# preset the usage message
USAGE="threshdet usage: threshdet -o 'arg' [-a 'arg'] [-hsv]"
# determine invocation option(s)
set -- `getopt a:ho:sv $*`
if [ $? != 0 ]
then
	echo $USAGE
	exit 2
fi
for i in $*
do
	case $i in
	-a) PAST=$2; shift; shift;;
	-h) HEADINGS="-h"; shift;;
	-o) RPTBASE=$2; shift; shift;;
	-s) INCLSUM="on"; shift;;
	-v) VERBOSE="-v"; shift;;
	--) shift; break;;
	esac
done

# use -o argument passed to this routine as error reporting file name base
if [ "$RPTBASE" ]
then
	errfile="$RPTBASE"".errpt"
	if [ ! -f $errfile ]		# if the file hasn't been created
	then				# then create it now (with size 0)
		cp /dev/null $errfile
	fi
else
	echo "Error threshold reporting filename must be passed as argument"
	exit 3
fi

# set default working directory to error logger default
cd /usr/adm/streams

#define file equates to shorten shell script lines
xfile=/usr/lib/err.xref
tfile=/usr/lib/err.thresh
# set up temporary file aliases
SUMFILE=$$.oldsum
tmpfile=$$.tmpfile
if [ -f $tmpfile ]
then
	rm $tmpfile
fi

# set 'th' variables
bbscan=/usr/lib/bbscan.awk
tmpsort=$$.th.srt
if [ -f $tmpsort ]
then
	rm $tmpsort
fi

###############################################################################
# we need to get device/error definitions and current thresholds settings from 
#	err.xref (error-desc cross-reference) and err.thresh (settings)
#	files.

# check first if cross-ref files is present
if [ ! -f $xfile ]
then
	# cross-reference file doesn't exist; make default one
	/usr/lib/aodmkxref $HEADINGS
	RETVAL=$?
	if [ $RETVAL -ne 0 ]
	then
		echo "Unable to create cross-reference default file; aborting"
		exit `expr $RETVAL + 8`
	fi
fi

# now read the error types for each kind of device being monitored
.   /usr/lib/aodrdset

# preset simple defaults (we want to do this even though they may be
# overwritten later when read from the thresholds settings file because
# there may be omissions in the settings file caused by editing; the
# above defaults are the minimal subset we wish to allow).
.   /usr/lib/aodsetdef

################################################################################

# now check if threshold settings file is present
if [ ! -f /usr/lib/err.thresh ]
then
	# the thresholds settings file doesn't exist; create it.
	.   /usr/lib/aodmkset
else
	# threshold settings file DOES exist, so override the
	# default settings by reading the settings from it.
	if [ "$HEADINGS" ]
	then
		echo "Reading current threshold settings, one moment please...\c"; 
	fi
	eval `awk '{printf("%s=%s\n%sACT=%s\n",$1,$2,$1,$3);}' /usr/lib/err.thresh`
	if [ "$HEADINGS" ]
	then
		echo "done"
	fi
fi

################################################################################
# get error lists for last $PAST days
# find relates to -mtime argument in strange ways; BEWARE!
Sdays="`find . -name 'error.[01][0-9]-[0-3][0-9]' -print`"

# if no files to check, exit with error status (4)
if [ ! "$Sdays" ]
then
	# the above error file lists could not be generated 
	echo "No errors were logged for the past $PAST days"
	mail root << ?
No errors were logged for the past $PAST days
?
	exit 4
fi

# in case of premature exit, remove temporary files
trap "rm -f $tmpfile; rm -f $SUMFILE; exit 5" 0 2 3 15

# summarize error activity in file lists
if [ -f $Sdays ]
then
	# if summary error activity file does not exist, create with
	# size 0 to simplify following awk|sort|awk command line
	#if [ ! -f $SUMFILE ]
	#then
		cp /dev/null $SUMFILE 
	#fi 
	# notify user of progression to this stage
	if [ "$HEADINGS" ]
	then
		echo "Generating error summary, please be patient...\c"
	fi

	cat $Sdays | sort -o $tmpsort
	if [ "$INCLSUM" ]
	then
		cat /usr/adm/streams/Error.summary >> $tmpsort
	fi

	awk -f /usr/lib/segpref.awk $tmpsort | awk -f /usr/lib/segdev.awk | \
	awk -f /usr/lib/segmulti.awk | awk -f /usr/lib/segkernel.awk | \
	awk -f /usr/lib/segmisc.awk > $tmpfile

	mv $tmpfile $tmpsort

	cat $SUMFILE >> $tmpsort
	sort +1 $tmpsort | awk -f /usr/lib/msgcount.awk > $SUMFILE

	if [ "$HEADINGS" ]
	then
		echo "done"; echo
	fi

	rm $tmpsort
	# check error lists for activity in excess of thresholds

	for device in hd fd tp mdc sio krn mis
	do
		if [ "$HEADINGS" ]
		then
			echo "Evaluating device \c"
			case $device in
			hd) echo "Hard Disk\c" ;;
			fd) echo "Floppy Disk\c" ;;
			tp) echo "Tape Drive\c" ;;
			mdc) echo "Multidrop Board\c" ;;
			sio) echo "Serial I/O Board\c" ;;
			krn) echo "Kernel\c" ;;
			mis) echo "Miscellaneous\c" ;;
			esac
			echo " errors"
		fi

		eval fieldlist=$"$device""fields"
		for fieldtype in $fieldlist
		do
			eval fieldname=$""$device"val"$fieldtype""
			eval fullname="$device""$fieldname"
			# using echo in next 2 lines gets rid of trailing blanks
			eval fieldstr=`echo $""$device"str"$fieldtype""`
			fldstr=`echo $fieldstr " "`
			eval dthresh=$"$fullname"
			eval fieldact=$""$fullname"ACT"
			stamp=`date`
			grpcnt=`grep -sc "$fullname" $SUMFILE`
			strc=0
			until [ $strc -gt $grpcnt ]
			do
				if [ $grpcnt -gt 0 -a $strc -eq 0 ]
				then
					strc=1
				fi
				strg=`grep -s "$fullname" $SUMFILE | awk "BEGIN{cnt=0}{if(++cnt==$strc){print \\\$0;exit}}"`
				if [ "$strg" ]
				then
					count=`echo $strg | awk '{print $1}' -`
					devstr=`echo $strg | awk '{print $3}' -`
					unitstr=`echo $strg | awk '{print $4}' -`
					substr=`echo $strg | awk '{print $5}' -`
				else
					count=0
				fi
				# check *.errpt files for previous notification
				prev=`grep -s "$fullname" /usr/adm/streams/*.errpt | awk "BEGIN {cnt=0} {if(\\\$5==\"$unitstr\" && \\\$6==\"$substr\")cnt++} END{print cnt}"`
				if [ $count -ge $dthresh -a $prev -eq 0 ]
				then
					# errors exceed threshold & were not previously
					# reported, do so now to screen & error file
					echo "ERROR $fullname $fieldact	$devstr $unitstr $substr<$count/$dthresh>$fldstr \c" | tee /dev/tty >> $errfile
					echo " "	# <CR> to screen
					# time stamp error file entry
					echo "$stamp" >> $errfile
					exceeded=1
				else
					# no new threshold activity; check if
					# verbose mode was requested
					if [ "$VERBOSE" ]
					then
						# verbose mode WAS requested; list
						# NON-new-excess activty, as well
						if [ $count -eq 0 ]
						then
							# if returned string was empty,
							# then the number of errors is 0
						echo "  -   $fullname $fieldact	 $device - - 	0/$dthresh : $fldstr"
						else
							if [ $count -ge $dthresh ]
							then
								# previously reported
								echo "prev  \c"
							else
								# not excess activity
								echo "  -   \c"
							fi
							echo "$fullname $fieldact	 $devstr $unitstr $substr 	$count/$dthresh : $fldstr"
						fi
					fi
				fi
				# increment string counter for next iteration
				strc=`expr $strc + 1`
			done
		done
	done
	rm $SUMFILE
fi

# exit with code reflecting error/threshold status
trap 0 2 3 15
exit $exceeded
