Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#! /bin/bash

#export WorkDir=$(dirname $0)
YourEmail=sdutta@cern.ch
#source /nfshome0/cmssw2/scripts/setup.sh

export SCRAM_ARCH=slc5_amd64_gcc462
if [[ ! $HOME =~ /nfshome0/dqm* ]]
then 
  HOME=/nfshome0/${USER/local/}
fi
if [[ -d ${HOME}/prod || -d ${HOME}/dev ]] 
then
  source /nfshome0/dqmpro/bin/setup_cmssw.sh
  [[ -d ${HOME}/prod ]] && cd ${HOME}/prod || cd ${HOME}/dev
  eval `scram runtime -sh`
else
  source $WorkDir/env3.sh
fi
export PYTHONPATH=$XPYTHONPATH:$PYTHONPATH
export HOSTNAME=$HOSTNAME
agents_pnames=("fileCollector" "producerFileCleanner")
agents_executables=("/nfshome0/dqmpro/filecollector/fileCollector2.py" "/nfshome0/dqmpro/filecollector/producerFileCleanner.py")
if [[ $USER =~ 'dqmpr.*' ]]
then
  agents_parameters=("/home/dqmprolocal/output /home/dqmprolocal/done /dqmdata/dqm/uploads" \
                     "/cmsnfshome0/nfshome0/dqmpro/filecollector/RootArchivalAndTransferSystem_cfg.py")
else
  agents_parameters=("/home/dqmdevlocal/output /home/dqmdevlocal/done /dqmdata/dqmintegration/upload" \
                     "/cmsnfshome0/nfshome0/dqmpro/filecollector/RootArchivalAndTransferSystem_cfg.py")
fi
WorkDir=$( dirname ${agents_executables[0]} )
[[ -e $WorkDir/.start ]] && [[ -e $WorkDir/.stop ]] && rm $WorkDir/.stop
[[ -e $WorkDir/.stop ]] && echo Found stop file not starting the agents && exit 0

msg=
new_line=
for pos in $(seq 0 $(( ${#agents_executables[@]} - 1 ))); do
  RUN_STAT=`ps -ef | grep -P "(${agents_executables[$pos]})" | grep -v grep | wc | awk '{print $1}'`
  if [ $RUN_STAT -ne 0 ];then
    echo ${agents_pnames[$pos]} is running
  else
    echo ${agents_pnames[$pos]} stopped by unknown reason and restarted now.
    TIMETAG=$(date +"%Y%m%d_%H%M%S")
    LOG=$WorkDir/log/LOG.${agents_pnames[$pos]}.$HOSTNAME.$TIMETAG
    ${agents_executables[$pos]} ${agents_parameters[$pos]} >& $LOG &
    date >> $LOG
    [[ ! -e $WorkDir/.start ]] && 
         echo ${agents_pnames[$pos]} stopped by unknown reason and restarted at $HOSTNAME. >> $LOG ||
         echo ${agents_pnames[$pos]} Found .start file, starting
    [[ ! -z $msg ]] && new_line="\n"    
    msg=$msg$new_line${agents_pnames[$pos]}" stopped by unknown reason and restarted now at $HOSTNAME."
  fi
done

[[ ! -e $WorkDir/.start && ! -z $msg ]] && echo $msg | mail -s "File Collection Agents not Running" $YourEmail

if [[ -e $WorkDir/.start ]]
then
  sleep 10
  master=$(cat $WorkDir/.start)
  [[ $(hostname -s) == $master ]] && rm $WorkDir/.start
fi