SkimLooper.tpl

CMSSW/Alignment/TrackerAlignment/scripts/SkimLooper.tpl

Line Code

Line	Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208	`#! /bin/bash` `### $1: file with list of ALCA types you want to process; remember to put for each of them` `### an ALCARECO style cfg in Alignment/CommonAlignmentProducer/python/` `#` `### $2: number, if greater than 2 it cleans the CASTOR area before starting (optional)` `source /afs/cern.ch/cms/caf/setup.sh` `curdir=$(pwd)` `#CASTOR_OUT="/castor/cern.ch/cms/store/user/bonato/CRAFTReproSkims/Craft09/4T/"` `ALCAFILELIST=$1` `CASTOR_OUT="<CASTOROUT>"` `DQM_OUT="${curdir}/MONITORING/DQM/"` `MAXEVENTS=10000` `#curdir=$( pwd )` `#check if output directory exists` `nsls /castor/cern.ch/cms/$CASTOR_OUT` `if [ $? -ne 0 ]` `then` `echo "Output directory: "` `echo /castor/cern.ch/cms/$CASTOR_OUT` `echo "does not exist. Please check the scripts. Exiting."` `exit 1` `fi` `#check if DQM output directory exists` `ls $DQM_OUT` `if [ $? -ne 0 ]` `then` `echo "DQM directory: "` `echo $DQM_OUT` `echo "does not exist. Please check the scripts. Exiting."` `exit 1` `fi` `## Clean output directory` `if [ $# -gt 1 ]` `then` `if [ $2 -gt 2 ]` `then` `if [ $(nsls -l /castor/cern.ch/cms/$CASTOR_OUT \| wc -l ) -gt 1 ] #there is always the dir 'logfiles'` `then` `echo "Cleaning output directory: $CASTOR_OUT"` `for file in $(nsls /castor/cern.ch/cms/$CASTOR_OUT/ \| grep "Skimmed" )` `do` `#echo` `rfrm /castor/cern.ch/cms/$CASTOR_OUT/$file` `done` `for file in $(nsls /castor/cern.ch/cms/$CASTOR_OUT/logfiles/)` `do` `#echo` `rfrm /castor/cern.ch/cms/$CASTOR_OUT/logfiles/$file` `done` `fi` `fi #end if $2 > 2` `fi #end if $# > 2` `#really needed ?` `#export STAGE_SVCCLASS=cmscaf` `for ALCATAG in $( cat $ALCAFILELIST )` `do` `echo` `echo "***************************************"` `echo "* Starting the ALCATAG: ${ALCATAG}"` `echo "***************************************"` `echo` `# DAT_FILE="/afs/cern.ch/cms/CAF/CMSALCA/ALCA_TRACKERALIGN/HIP/bonato/DEVEL/HIPWorkflow/ALCARECOskim/v1.4/data/${ALCATAG}.dat"` `DAT_FILE="${curdir}/../data/${ALCATAG}.dat"` `TPL_FILE="TkAlCaRecoSkimming.${ALCATAG}.tpl"` `TAG=$ALCATAG #"CRAFT"` `JOBTAG="ALCASkim_"$TAG` `BASE_TPL=$(basename "$TPL_FILE" .tpl)` `echo ""` `echo "I am in $curdir"` `INDEX=1` `# echo "****************************"` `# echo "Starting the show"` `# echo` `TOTFILES=0` `for i in $( cat $DAT_FILE )` `do` `#pick the total nr events in this file from the previously produced nevents.out` `let TOTFILES=TOTFILES+1` `TOTEVTS=$(sed -n $TOTFILES'p' ../data/nevents${ALCATAG}.out)` `#echo "The file #$TOTFILES has $TOTEVTS events"` `if [ $TOTEVTS == 0 ]` `then` `continue` `fi` `TOTSPLITS=$(( ( $TOTEVTS / $MAXEVENTS ) +1 ))` `firstev=0` `lastev=-1` `#echo "I will split it into $TOTSPLITS"` `if [ $TOTSPLITS > 1 ]` `then` `nsplits=1` `while [ $nsplits -le $TOTSPLITS ]` `do` `#echo "Splitting the file $TOTFILE : $nsplits"` `firstev=$(( $MAXEVENTS$(( $nsplits-1 ))+1 ))` `lastev=$MAXEVENTS #$(( ($MAXEVENTS$nsplits) ))` `JOB=$JOBTAG"_file"$INDEX` `CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"` `sed -e "s\|<JOB>\|${JOB}\|g" -e "s\|<INPATH>\|${i}\|g" -e "s\|<INIEVT>\|${firstev}\|g" -e "s\|<FINEVT>\|${lastev}\|g" -e "s\|<ALCATAG>\|${TAG}\|g" < $TPL_FILE > $CFG_FILE` `let INDEX=INDEX+1` `let nsplits=nsplits+1` `# if [ $INDEX -ge 3 ]` `# then` `# echo "Reached a maximum number of files: $INDEX. Stopping the submission"` `# break` `# fi` `done` `else #file is small and does not contain too many events` `firstev=0` `lastev=-1` `JOB=$JOBTAG"_file"$INDEX` `CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"` `sed -e "s\|<JOB>\|${JOB}\|g" -e "s\|<INPATH>\|${i}\|g" -e "s\|<INIEVT>\|${firstev}\|g" -e "s\|<FINEVT>\|${lastev}\|g" -e "s\|<ALCATAG>\|${TAG}\|g" < $TPL_FILE > $CFG_FILE` `let INDEX=INDEX+1` `# if [ $INDEX -ge 3 ]` `# then` `# echo "Reached a maximum number of files: $INDEX. Stopping the submission"` `# break` `# fi` `fi` `# if [ $INDEX -ge 3 ]` `# then` `# echo "Reached a maximum number of files: $INDEX. Stopping the submission"` `# break` `# fi` `#echo "--- moving to next file. At the moment INDEX=$INDEX"` `done` `TOTCFGFILES=$(( $INDEX -1 ))` `#echo "Tot cfg files: $TOTCFGFILES"` `##second loop submitting what we prepared before` `INDEX=1` `while [ $INDEX -le $TOTCFGFILES ]` `do` `JOBNAME="ALCASkim"$TAG"_"$INDEX` `LOGFILE="${JOBNAME}.log"` `CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"` `echo "Submitting $JOBNAME with config file $CFG_FILE"` `REM=0` `let "REM=$INDEX % 300"` `if [ $REM -lt 100 ]` `then` `#echo "dummy A"` `bsub -q cmscaf1nd -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"` `elif [ $REM -lt 200 ]` `then` `#echo "dummy B"` `bsub -q cmsexpress -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"` `else` `#echo "dummy C"` `bsub -q cmscaf1nd -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"` `fi` `##fi #dummy` `let INDEX=INDEX+1` `done` `done #end 'for loop' on ALCA categories` `#cd $curdir` `#mv "$BASE_TPL"".py" $DQM_OUT/../logfiles/`

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208

#! /bin/bash

### $1: file with list of ALCA types you want to process; remember to put for each of them 
###     an ALCARECO style cfg in Alignment/CommonAlignmentProducer/python/ 
#
### $2: number, if greater than 2 it cleans the CASTOR area before starting (optional) 

source /afs/cern.ch/cms/caf/setup.sh
curdir=$(pwd)

#CASTOR_OUT="/castor/cern.ch/cms/store/user/bonato/CRAFTReproSkims/Craft09/4T/"
ALCAFILELIST=$1
CASTOR_OUT="<CASTOROUT>"
DQM_OUT="${curdir}/MONITORING/DQM/"
MAXEVENTS=10000

#curdir=$( pwd )

#check if output directory exists
nsls /castor/cern.ch/cms/$CASTOR_OUT
if [ $? -ne 0 ]
then
echo "Output directory: "
echo /castor/cern.ch/cms/$CASTOR_OUT
echo "does not exist. Please check the scripts. Exiting."
exit 1
fi


#check if DQM output directory exists
ls $DQM_OUT
if [ $? -ne 0 ]
then
echo "DQM directory: "
echo $DQM_OUT
echo "does not exist. Please check the scripts. Exiting."
exit 1
fi


## Clean output directory

if [ $# -gt 1 ]
then 

    if [ $2 -gt 2 ]
	then
	
	if [ $(nsls -l /castor/cern.ch/cms/$CASTOR_OUT | wc -l ) -gt 1 ] #there is always the dir 'logfiles' 
	    then
	    echo "Cleaning output directory: $CASTOR_OUT"
	    
	    for file in $(nsls /castor/cern.ch/cms/$CASTOR_OUT/ | grep "Skimmed" ) 
	      do
#echo
	      rfrm  /castor/cern.ch/cms/$CASTOR_OUT/$file
	    done
	    
	    for file in $(nsls /castor/cern.ch/cms/$CASTOR_OUT/logfiles/) 
	      do
#echo
	      rfrm  /castor/cern.ch/cms/$CASTOR_OUT/logfiles/$file
	    done
	fi
	
    fi #end if $2 > 2
    
fi #end if $# > 2


#really needed ?
#export STAGE_SVCCLASS=cmscaf

for ALCATAG in $( cat $ALCAFILELIST  )
do

  echo
  echo "*****************************************"
  echo "*** Starting the ALCATAG: ${ALCATAG}"
  echo "*****************************************"
  echo
#  DAT_FILE="/afs/cern.ch/cms/CAF/CMSALCA/ALCA_TRACKERALIGN/HIP/bonato/DEVEL/HIPWorkflow/ALCARECOskim/v1.4/data/${ALCATAG}.dat"
  DAT_FILE="${curdir}/../data/${ALCATAG}.dat"
  TPL_FILE="TkAlCaRecoSkimming.${ALCATAG}.tpl"
  TAG=$ALCATAG #"CRAFT"
  JOBTAG="ALCASkim_"$TAG
  BASE_TPL=$(basename "$TPL_FILE" .tpl)
  echo ""
  echo "I am in $curdir"
  INDEX=1



# echo "*******************************"
# echo "Starting the show"
# echo

  TOTFILES=0
  for i in $( cat $DAT_FILE )
    do

#pick the total nr events in this file from the previously produced nevents.out
    let TOTFILES=TOTFILES+1
    TOTEVTS=$(sed -n $TOTFILES'p' ../data/nevents${ALCATAG}.out)
#echo "The file #$TOTFILES has $TOTEVTS events"
    if [ $TOTEVTS == 0 ]
	then
	continue
    fi 
    TOTSPLITS=$(( ( $TOTEVTS / $MAXEVENTS ) +1 ))
    firstev=0
    lastev=-1
    
#echo "I will split it into $TOTSPLITS"
    if [ $TOTSPLITS > 1 ]
	then
	nsplits=1


	while [ $nsplits -le $TOTSPLITS  ]
	  do
#echo "Splitting the file $TOTFILE : $nsplits"
	  firstev=$(( $MAXEVENTS*$(( $nsplits-1 ))+1 ))
	  lastev=$MAXEVENTS    #$(( ($MAXEVENTS*$nsplits) ))
	  JOB=$JOBTAG"_file"$INDEX 
	  CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"
	  sed -e "s|<JOB>|${JOB}|g" -e "s|<INPATH>|${i}|g" -e "s|<INIEVT>|${firstev}|g" -e "s|<FINEVT>|${lastev}|g"  -e "s|<ALCATAG>|${TAG}|g"  < $TPL_FILE > $CFG_FILE
	  let INDEX=INDEX+1
	  let nsplits=nsplits+1
#     if [ $INDEX -ge 3 ]
# 	then
# 	echo "Reached a maximum number of files: $INDEX. Stopping the submission"
# 	break
#     fi
	done

    else #file is small and does not contain too many events
	firstev=0
	lastev=-1
	JOB=$JOBTAG"_file"$INDEX 
	CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"
	sed -e "s|<JOB>|${JOB}|g" -e "s|<INPATH>|${i}|g" -e "s|<INIEVT>|${firstev}|g" -e "s|<FINEVT>|${lastev}|g"  -e "s|<ALCATAG>|${TAG}|g"  < $TPL_FILE > $CFG_FILE
	let INDEX=INDEX+1

# if [ $INDEX -ge 3 ]
# 	then
# 	    echo "Reached a maximum number of files: $INDEX. Stopping the submission"
# 	break
# 	fi

    fi

 #   if [ $INDEX -ge 3 ]
# 	then
# 	echo "Reached a maximum number of files: $INDEX. Stopping the submission"
# 	break
#    fi

#echo "--- moving to next file. At the moment INDEX=$INDEX"


  done

  TOTCFGFILES=$(( $INDEX -1 ))
#echo "Tot cfg files: $TOTCFGFILES"

##second loop submitting what we prepared before
  INDEX=1
  while [ $INDEX -le $TOTCFGFILES ]
    do
    JOBNAME="ALCASkim"$TAG"_"$INDEX
    LOGFILE="${JOBNAME}.log"
    CFG_FILE=$BASE_TPL"."$TAG"_cfg."$INDEX".py"

    
    echo "Submitting $JOBNAME with config file $CFG_FILE"

    REM=0
    let "REM=$INDEX % 300"

    if [ $REM -lt 100 ]
	then
	#echo "dummy A" 
	bsub -q cmscaf1nd -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"
    elif [ $REM -lt 200 ] 
	then 
	#echo "dummy B" 
	bsub -q cmsexpress -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"
    else
	#echo "dummy C" 
	bsub -q cmscaf1nd    -J $JOBNAME -oo $LOGFILE skim_exec.sh "$curdir/$CFG_FILE" "$CASTOR_OUT" "$DQM_OUT"
    fi


##fi #dummy


    let INDEX=INDEX+1
  done




done #end 'for loop' on ALCA categories


#cd $curdir
#mv "$BASE_TPL"*".py" $DQM_OUT/../logfiles/