#!/bin/sh
#########################################################################
## HP System Monitor - A lightweight System Management Tool            ##
#########################################################################
#
## This script is dependent on the hp-health RPM package being
## installed for basic system management. For storage management, the
## hp-snmp-agents needs to be installed and the appropriate storage
## agents enabled to log storage events to the HP Integrated Management
## log (IML).  This tool relies on changes to the IML to report errors.
## 
## Once an error is detected, applications need to determine the level
## of detail required.  Both the hplog (8) and hpasmcli(4) provide
## detailed information of key system components such as Fans,
## Power Supplies, Temperature Sensors and DIMM Status.
## 
## The HP Hardware Watchdog Timer (hp-wdt) should also be installed
## and the Linux kernel "NMI Watchdog" timer disabled to take
## advantage of the advanced critical error logging capabilities on
## HP ProLiant 300 - 700 G5 series servers.
##
#########################################################################

#
## This routine will setup the basic management directory structure
#
init_dir_structure()
{
	if [ ! -d /opt/hp/hpsysmon ]
	then
		mkdir -p /opt/hp/hpsysmon/prev
		mkdir -p /opt/hp/hpsysmon/curr
	fi
}

#
## This routine will return any failures at initialization time.
#
check_for_failures()
{
	CHKDIR=$1
	grep -e "Failed" /opt/hp/hpsysmon/${CHKDIR}/fan.log
	if [ $? -eq 0 ]
	then
		echo "A failed fan has been detected:"
		cat /opt/hp/hpsysmon/${CHKDIR}/fan.log
		return 1
	fi

	grep -e "Critical" /opt/hp/hpsysmon/${CHKDIR}/temp.log
	if [ $? -eq 0 ]
	then
		echo "A critical temperature condition detected:"
		cat /opt/hp/hpsysmon/${CHKDIR}/temp.log
		return 1
	fi


	grep -e "Failed" /opt/hp/hpsysmon/${CHKDIR}/power.log
	if [ $? -eq 0 ]
	then
		echo "A failed power supply detected:"
		cat /opt/hp/hpsysmon/${CHKDIR}/power.log
		return 1
	fi


	grep -e "degraded" -e "not match" -e "not supported" -e "bad configuration"  /opt/hp/hpsysmon/${CHKDIR}/dimm.log
	if [ $? -eq 0 ]
	then
		echo "A Memory DIMM error:"
		cat /opt/hp/hpsysmon/${CHKDIR}/dimm.log
		return 1
	fi


	return 0
}

#
## This routine will take the initial snapshot of the current
## management data.
#
take_initial_snapshot()
{
	hpasmcli -s "show dimm" > /opt/hp/hpsysmon/prev/dimm.log
	hplog -v > /opt/hp/hpsysmon/prev/iml.log
	hplog -t > /opt/hp/hpsysmon/prev/temp.log
	hplog -f > /opt/hp/hpsysmon/prev/fan.log
	hplog -p > /opt/hp/hpsysmon/prev/power.log
}

#
## This routine will take the current snapshot of the 
## management data.
#
take_current_snapshot()
{
	hpasmcli -s "show dimm" > /opt/hp/hpsysmon/curr/dimm.log
	hplog -v > /opt/hp/hpsysmon/curr/iml.log
	hplog -t > /opt/hp/hpsysmon/curr/temp.log
	hplog -f > /opt/hp/hpsysmon/curr/fan.log
	hplog -p > /opt/hp/hpsysmon/curr/power.log
}

#
## Check for errors
##
check_diffs()
{
	REPORT_ERRORS="FALSE"

	take_current_snapshot

	diff /opt/hp/hpsysmon/curr/iml.log /opt/hp/hpsysmon/prev/iml.log > /dev/null
	if [ $? -ne 0 ]
	then
		REPORT_ERRORS="TRUE"
		diff /opt/hp/hpsysmon/curr/iml.log /opt/hp/hpsysmon/prev/iml.log
	fi

	diff /opt/hp/hpsysmon/curr/temp.log /opt/hp/hpsysmon/prev/temp.log > /dev/null
	[ $? -ne 0 ] && REPORT_ERRORS="TRUE"

	diff /opt/hp/hpsysmon/curr/fan.log /opt/hp/hpsysmon/prev/fan.log > /dev/null
	[ $? -ne 0 ] && REPORT_ERRORS="TRUE"

	diff /opt/hp/hpsysmon/curr/power.log /opt/hp/hpsysmon/prev/power.log > /dev/null
	[ $? -ne 0 ] && REPORT_ERRORS="TRUE"

	diff /opt/hp/hpsysmon/curr/dimm.log /opt/hp/hpsysmon/prev/dimm.log > /dev/null
	[ $? -ne 0 ] && REPORT_ERRORS="TRUE"

	if [ "$REPORT_ERRORS" == "TRUE" ]
	then
		check_for_failures "curr"
		if [ $? -eq 0 ]
		then
			echo "There appears to be a device repair"
		fi
	fi

	cp /opt/hp/hpsysmon/curr/iml.log /opt/hp/hpsysmon/prev/iml.log
	cp /opt/hp/hpsysmon/curr/fan.log /opt/hp/hpsysmon/prev/fan.log
	cp /opt/hp/hpsysmon/curr/temp.log /opt/hp/hpsysmon/prev/temp.log
	cp /opt/hp/hpsysmon/curr/power.log /opt/hp/hpsysmon/prev/power.log
	cp /opt/hp/hpsysmon/curr/dimm.log /opt/hp/hpsysmon/prev/dimm.log

}


#
## MAIN SCRIPT
#

init_dir_structure
take_initial_snapshot
check_for_failures "prev"

while ($true)
do
	sleep 60
	check_diffs
done

