#!/bin/sh VERS="lantracer Version 1.5" # #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # aj - 1.0 LAN specific modified from atmtracer version 1.3 # 1.1 modified to trace multiple interfaces and dump routing info # 1.2 added path checker (ping remote system and stop on failure) # 1.3 added event checker - lancheck script called from this script # 1.4 added stats collection routine and extra call to an external script # 1.5 added truss facility, include daemon in SISPID caoomand # # Script to run traces for a set period of time ($TIME seconds) and write the # output to a sequence ($NUMBER) of files in the specified directory ($DIR). # The values chosen for TIME and NUMBER will need to be changed according to the # maximum disk space that the set of trace files can take up and how busy the link # is in the busiest period. # # The combination of TIME and NUMBER needs to be sufficient that the elapsed # time between the problem occurring and the traces being stopped is less than # the time period covered by the traces. # The whole object is to capture in one continuous trace the events before the # problem and during the problem. # # Add an IP address to the REMHOST variable if you want to enable the path checker # routine. This IP address will be used with ping to determine if the path is ok. # # Add a script name (default is "lancheck") to the UTIL variable if you want to call # a utility script to do event checking # # Redirect stdout and stderr when starting lantracer and background it. # e.g. under ksh : ./lantracer >/dev/null 2>&1 & # # To stop the tracing delete the file $DIR/running, or it will stop automatically # if a REMHOST is defined and a ping to REMHOST fails. The tracing will stop after # the current $TIME period has completed or if the path to the REMHOST goes away. # # Alternatively, find the process id of each trace process and issue it a # kill and then kill the tracer process (not recommended). # # DO NOT just kill or control-C the lantracer process # # The trace files will be in $DIR and with the default value for NUMBER will be : # lanout..1 -> lanout..3, arptable.1-3, iretable.1-3 and Pathlog # other tracing files will follow the same numbering format # # # Copy all these files to somewhere safe for sending to Sun. #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #Set up some variables #+++++++++++++++++++++ # the amount of time in seconds the traces will run before writing to the next file #========= TIME=600 #========= # the number of files to write before wrapping around to the first one again. #========= NUMBER=3 #========= # where to store the files (preferably where they will not get deleted at reboot) #=========== DIR=/usr/tmp/lan #=========== # interval for log output #======= PERIOD=3 #======= # List of the lan devices to trace #================================== ETHDEVICE="eri0 hme0" #================================== # get pid for truss trace - leave blank if not required #========= # SISPID=`/usr/bin/pgrep in.mpathd` SISPID= #========= # Name of utility script #===================== # UTIL=lancheck UTIL= #===================== # Additional arguments for utility script #======= SLEEP=120 #======= # System stats collection #======== STATS=yes #STATSCRIPT=sysstat #STATCOMM="/bin/iostat -xtcn 5 150" STATCOMM="/usr/bin/kstat -p" #======== # Network stats #========= NETSTAT= #========== # where to find the commands #================================= SNOOP=/usr/sbin/snoop NDD="/usr/sbin/ndd /dev/ip ipv4_ire_status" #NDD="/usr/sbin/ndd /dev/ip ip_ire_status" ARP="/bin/netstat -npvr" #================================= #================================= SYSTEM1= SYSTEM2= #================================= # The remote host to ping to check the Path #===================== REMHOST= #===================== # set up the running variable and create a file to show we are tracing, chmod the # running file so any user can stop the tracing SEQ=0 if [ ! -d $DIR ] then /bin/mkdir -p $DIR fi /bin/chmod 777 $DIR /bin/touch $DIR/running /bin/chmod 666 $DIR/running # Check if the utility script is needed and if it is in this directory if [ $UTIL ] then if [ ! -x $UTIL ] then /usr/bin/echo "$UTIL must be executable and be in the same directory as lantracer" exit fi ./$UTIL $DIR $SLEEP >/dev/null 2>&1 & PIDUTIL=$! fi # Check if the stat collection script is needed and if it is in this directory if [ $STATSCRIPT ] then if [ ! -x $STATSCRIPT ] then /usr/bin/echo "$STATSCRIPT must be executable and be in the same directory as lantracer" exit fi fi # Initialise the Pathlog /usr/bin/echo $VERS > $DIR/Pathlog /usr/bin/date >> $DIR/Pathlog /usr/bin/echo Check daemon pid = $PIDUTIL >> $DIR/Pathlog /usr/bin/echo Remote host = $REMHOST >> $DIR/Pathlog # Start the tracing and stop if the file "running" is removed #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ while [ -f $DIR/running ] do if [ $SEQ -ne $NUMBER ] then SEQ=`expr $SEQ + 1` if [ $NETSTAT ] then # Dump routing before snoop /bin/date > ${DIR}/iretable.${SEQ} $NDD >> ${DIR}/iretable.${SEQ} /bin/date > ${DIR}/arptable.${SEQ} $ARP >>${DIR}/arptable.$SEQ fi PIDS="" # launch snoop for every listed interface for interface in $ETHDEVICE do $SNOOP -q -d $interface -s 208 -o ${DIR}/lanout.${interface}.${SEQ} $SYSTEM1 $SYSTEM2 >/dev/null 2>&1 & PIDLAN=$! PIDS=`echo $PIDS $PIDLAN` done # collect the stats if [ $STATS ] then /usr/bin/date > ${DIR}/stat1.${SEQ} $STATCOMM >>${DIR}/stat1.${SEQ} 2>&1 & PIDSTAT=$! fi # truss daemon if [ $SISPID ] then /bin/truss -aefdp -rall -wall -vall -o ${DIR}/trussout.${SEQ} $SISPID 2>&1 & PIDTRUSS=$! fi if [ $STATSCRIPT ] then /usr/bin/date > ${DIR}/stat2.${SEQ} ./$STATSCRIPT $DIR $SEQ >>${DIR}/stat2.${SEQ} 2>&1 & PIDSCRIPT=$! fi sleep $TIME if [ $REMHOST ] then RSTATE=`/usr/sbin/ping $REMHOST` rstatus=$? if [ $rstatus -eq 1 ] then /bin/date >> $DIR/Pathlog /usr/bin/echo "Path gone - $RSTATE ($SEQ)" >> $DIR/Pathlog /bin/rm $DIR/running # sleep a little to drain traces sleep 10 else /bin/date >> $DIR/Pathlog /usr/bin/echo "$RSTATE ($SEQ)" >> $DIR/Pathlog fi fi if [ $NETSTAT ] then # Dump routing after snoop /bin/date >> ${DIR}/iretable.${SEQ} $NDD /dev/ip ip_ire_status >> ${DIR}/iretable.${SEQ} /bin/date >> ${DIR}/arptable.${SEQ} $ARP >>${DIR}/arptable.$SEQ fi for pid in $PIDS do kill $pid done if [ $SISPID ] then kill $PIDTRUSS fi if [ $STATS ] then kill $PIDSTAT fi if [ $STATSCRIPT ] then kill $PIDSCRIPT fi # check if we need to stop tracing if [ ! "-f $DIR/running" ] then exit fi else SEQ=0 fi done