scripts/harvesting_tools/Harvesting.sh

0001 #!/bin/zsh
0002
0003 #if [ $SCRAM_ARCH != "slc5_ia32_gcc434" ] ; then
0004 #  echo "wrong platform"
0005 #  exit
0006 #fi
0007
0008 date=`date +%y%m%d%H%M%S`
0009
0010 echo
0011 echo "=========================================================="
0012 echo Start Harvesting script at `date`
0013 echo "=========================================================="
0014 echo
0015
0016 #logfile=/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/bin/Harvesting_${date}.log
0017 #touch $logfile
0018
0019 LOCK=harvesting.lock
0020 if [ -e $LOCK ]; then
0021  echo An update is running with pid $(cat $LOCK)
0022  echo Remove the lock file $LOCK if the job crashed
0023  exit
0024 else
0025  echo $$ > $LOCK
0026 fi
0027
0028 . /afs/cern.ch/cms/sw/cmsset_default.sh
0029 cd /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/CMSSW_3_8_6/src
0030 source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh
0031 cmsenv
0032 source /afs/cern.ch/cms/ccs/wm/scripts/Crab/CRAB_2_7_5_patch1/crab.sh
0033 pushd /afs/cern.ch/cms/sw/${SCRAM_ARCH}/cms/dbs-client/DBS_2_0_9_patch_4-cms/lib
0034 source setup.sh
0035 popd
0036 # setup for CAF
0037 . /afs/cern.ch/cms/caf/setup.sh
0038
0039
0040 # the following is useless, but does not harm
0041 export VO_CMS_SW_DIR=/afs/cern.ch/cms/sw
0042 export CMS_PATH=/afs/cern.ch/cms/sw
0043 . $CMS_PATH/cmsset_default.sh
0044
0045 voms-proxy-init
0046
0047 ####
0048 basedir=/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting
0049 bindir=$basedir/bin
0050 outdir=$basedir/bin/test
0051 cd $bindir
0052 pwd
0053
0054 outfiledate=DQM_${date}.txt
0055 prevfile=DQM_prev.txt
0056 #difffiledate=DQM_${date}.log
0057 todofiledate=DQM_${date}.todo
0058 #datafiledate=DQM_${date}.datasets
0059
0060 ####
0061 # remove previous stuff
0062
0063 echo "cleaning previous jobs"
0064 rm -fr $basedir/CMSSW*/harvesting_area/*__DQM_*site_*
0065 rm -fr $basedir/CMSSW*/harvesting_area/crab*
0066 rm -fr $basedir/CMSSW*/harvesting_area/harvesting*
0067 rm -fr $basedir/CMSSW*/harvesting_area/multicrab*
0068 echo "cleaning done"
0069
0070 echo $outfiledate
0071 rm -fr $outfiledate help1 help2
0072 touch $outfiledate
0073
0074 ####
0075 # run DBS
0076
0077 echo
0078 echo "###############################################################"
0079 echo Checking DBS for new datasets at `date`
0080 echo
0081 echo
0082
0083 dbs search --query="find dataset, release, dataset.tag, datatype where dataset like %/DQM and site = caf.cern.ch" file.status = valid | grep DQM >> $outfiledate
0084
0085 ####
0086 # sort outfile
0087 sort -u $outfiledate > help1
0088 mv help1 $outfiledate
0089 sort -u  $prevfile > help2
0090 mv help2 $prevfile
0091 diff $outfiledate $prevfile | awk '{print $2" "$3" "$4" "$5}' | grep DQM | sort -u > $todofiledate
0092
0093 if [ `wc -l $outfiledate | awk '{print $1}'` -ne 0 ]; then
0094 rm -fr $prevfile
0095 sort $outfiledate > $prevfile
0096 fi
0097
0098 ###
0099 # if non-zero then produce todo files and all the rest
0100 if [ `wc -l $todofiledate | awk '{print $1}'` -ne 0 ] ; then
0101 cp $todofiledate $outdir/$todofiledate
0102
0103
0104 for i in `awk '{print $2}' $todofiledate | sort -u` ; do
0105 rm -fr DQM_${date}.$i
0106 sort -u $todofiledate | grep $i" " | grep "mc" | awk '{print $1" "$2" "$3" "$4}' > DQM_${date}.mc.$i
0107 sort -u $todofiledate | grep $i" " | grep "data" | awk '{print $1" "$2" "$3" "$4}' > DQM_${date}.data.$i
0108 done
0109
0110 ###
0111 # remove zero-length files
0112 for file in `ls DQM_${date}.*.CMSSW*` ; do
0113 count=`wc -l $file | awk '{print $1}'`
0114 echo $count
0115 if [ $count -eq 0 ] ; then
0116 ls -al $file ; rm $file
0117 fi
0118 done
0119
0120 ###
0121 # copy to output
0122 cat DQM_${date}.*.CMSSW*
0123 cp DQM_${date}.*.CMSSW* $outdir/.
0124
0125
0126 #####
0127 # now submit jobs
0128 #
0129
0130 cd $outdir
0131
0132 ##-------------------------------------------
0133 ## loop over all datasets
0134 ##-------------------------------------------
0135
0136 for i in `ls DQM_${date}.*.CMSSW*` ; do
0137
0138 for dataset in `cat $i | awk {'print $1'} | uniq` ; do
0139 cmssw=`cat $i | grep $dataset | awk {'print $2'} | uniq`
0140 tag=`cat $i |  grep $dataset | awk {'print $3'} | uniq`
0141 dtype=`cat $i |  grep $dataset | awk {'print $4'} | uniq`
0142 if [ $dtype = "data" ]
0143 then
0144 htype="DQMoffline"
0145 else
0146 htype="MC"
0147 fi
0148
0149 echo $i $dataset $cmssw $tag $dtype $htype
0150
0151 if [ $cmssw = "CMSSW_3_6_1_patch4" ] ; then ; cmssw=CMSSW_3_6_1_patch4 ; fi
0152 if [ $cmssw = "CMSSW_3_7_0_patch2" ] ; then ; cmssw=CMSSW_3_7_0_patch4 ; fi
0153 if [ $cmssw = "CMSSW_3_7_0_patch3" ] ; then ; cmssw=CMSSW_3_7_0_patch4 ; fi
0154 if [ $cmssw = "CMSSW_3_6_2" ] ; then ; cmssw=CMSSW_3_6_3 ; fi
0155
0156
0157 ## Set up CMSSW environment, if necessary
0158 [ -d /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/$cmssw ]
0159 if [ `echo $?` != 0 ];
0160 then
0161 echo
0162 echo "####################################################################"
0163 echo Setting up $cmssw at `date`
0164 echo
0165 echo
0166 cd /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting
0167 scramv1 project CMSSW $cmssw
0168 cd $cmssw/src
0169 cvs co DQM/Integration/scripts/harvesting_tools
0170 cvs co -r CMSSW_3_6_1 Configuration/PyReleaseValidation/python/ConfigBuilder.py
0171 addpkg Configuration/StandardSequences
0172 cvs update -r 1.9 Configuration/StandardSequences/python/Harvesting_cff.py
0173 sed -i 's/postValidation\*hltpostvalidation_prod/hltpostvalidation_prod/'  Configuration/StandardSequences/python/Harvesting_cff.py
0174 scramv1 b
0175 cd ..
0176 mkdir harvesting_area
0177 cd harvesting_area
0178 ln -s ../src/DQM/Integration/scripts/harvesting_tools/cmsHarvester.py .
0179 ln -s ../src/DQM/Integration/scripts/harvesting_tools/check_harvesting.pl .
0180 fi
0181
0182 cd /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/$cmssw/src
0183 export VO_CMS_SW_DIR=/afs/cern.ch/cms/sw
0184 eval `scramv1 runtime -sh`
0185 cd ../harvesting_area
0186
0187 echo
0188 echo "####################################################################"
0189 echo Running the harvester at `date` ...
0190 echo
0191 echo
0192
0193 if [ $dtype = "data" ] ; then
0194     ./cmsHarvester.py --dataset=$dataset --harvesting_type=$htype \
0195         --globaltag=$tag --site=CAF --force --Jsonfile=/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/bin/JSON.txt
0196 else
0197     ./cmsHarvester.py --dataset=$dataset --harvesting_type=$htype \
0198         --globaltag=$tag --site=CAF --no-ref-hists --force
0199 fi
0200
0201 echo
0202 echo "####################################################################"
0203 echo Start creating jobs at `date`
0204 echo
0205 echo
0206 multicrab -create
0207
0208 echo
0209 echo "####################################################################"
0210 echo Start submitting jobs at `date` ...
0211 echo
0212 echo
0213 multicrab -submit
0214
0215 rm -fr harvesting_accounting.txt
0216
0217 cd $outdir
0218
0219 done
0220 done
0221
0222 ##---------------------------------------------------
0223 ## ... end loop
0224 ##---------------------------------------------------
0225
0226 fi
0227
0228 cd $bindir
0229 rm -fr help2 help1
0230 rm -f $LOCK
0231
0232 echo
0233 echo "=========================================================="
0234 echo End Harvesting script at `date`
0235 echo "=========================================================="
0236 echo