Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:50

0001 #!/usr/bin/env python3
0002 # Colin
0003 # batch mode for cmsRun, March 2009
0004 
0005 from builtins import range
0006 import os, sys,  imp, re, pprint, string, time,shutil,copy,pickle,math
0007 from optparse import OptionParser
0008 
0009 # particle flow specific
0010 from PhysicsTools.HeppyCore.utils.batchmanager import BatchManager
0011 import PhysicsTools.HeppyCore.utils.eostools as eostools
0012 
0013 # cms specific
0014 import FWCore.ParameterSet.Config as cms
0015 from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
0016 
0017 
0018 def batchScriptCCIN2P3():
0019    script = """!/usr/bin/env bash
0020 #PBS -l platform=LINUX,u_sps_cmsf,M=2000MB,T=2000000
0021 # sets the queue
0022 #PBS -q T
0023 #PBS -eo
0024 #PBS -me
0025 #PBS -V
0026 
0027 source $HOME/.bash_profile
0028 
0029 echo '***********************'
0030 
0031 ulimit -v 3000000
0032 
0033 # coming back to submission dir do setup the env
0034 cd $PBS_O_WORKDIR
0035 eval `scramv1 ru -sh`
0036 
0037 
0038 # back to the worker
0039 cd -
0040 
0041 # copy job dir here
0042 cp -r $PBS_O_WORKDIR .
0043 
0044 # go inside
0045 jobdir=`ls`
0046 echo $jobdir
0047 
0048 cd $jobdir
0049 
0050 cat > sysinfo.sh <<EOF
0051 #! env bash
0052 echo '************** ENVIRONMENT ****************'
0053 
0054 env
0055 
0056 echo
0057 echo '************** WORKER *********************'
0058 echo
0059 
0060 free
0061 cat /proc/cpuinfo 
0062 
0063 echo
0064 echo '************** START *********************'
0065 echo
0066 EOF
0067 
0068 source sysinfo.sh > sysinfo.txt
0069 
0070 cmsRun run_cfg.py
0071 
0072 # copy job dir do disk
0073 cd -
0074 cp -r $jobdir $PBS_O_WORKDIR
0075 """
0076    return script
0077 
0078 
0079 
0080 def rootfiles_to_eos_script(index, remoteDir):
0081    remoteDir = eostools.eosToLFN(remoteDir)
0082    return """
0083 for file in *.root; do
0084 newFileName=`echo $file | sed -r -e 's/\./_{index}\./'`
0085 fullFileName={remoteDir}/$newFileName
0086 {eos} cp $file /eos/cms/$fullFileName
0087 {eos} chmod 755 /eos/cms/$fullFileName
0088 rm *.root
0089 done
0090 """.format(index=index, remoteDir=remoteDir, eos=eostools.eos_select)
0091 
0092 
0093 def batchScriptCERN(  remoteDir, index ):
0094    '''prepare the LSF version of the batch script, to run on LSF'''
0095    script = """#!/bin/bash
0096 # sets the queue
0097 #BSUB -q 8nm
0098 
0099 echo 'environment:'
0100 echo
0101 env
0102 ulimit -v 3000000
0103 echo 'copying job dir to worker'
0104 cd $CMSSW_BASE/src
0105 eval `scramv1 ru -sh`
0106 cd -
0107 cp -rf $LS_SUBCWD .
0108 ls
0109 cd `find . -type d | grep /`
0110 echo 'running'
0111 {prog} run_cfg.py
0112 if [ $? != 0 ]; then
0113     echo wrong exit code! removing all root files
0114     rm *.root
0115     exit 1 
0116 fi
0117 echo 'sending the job directory back'
0118 """.format(prog=prog)
0119 
0120    if remoteDir != '':
0121       script += rootfiles_to_eos_script(index, remoteDir)
0122 
0123    script += 'cp -rf * $LS_SUBCWD\n'
0124    
0125    return script
0126 
0127 def batchScriptLocal(  remoteDir, index ):
0128    '''prepare a local version of the batch script, to run using nohup'''
0129 
0130    script = """#!/bin/bash
0131 echo 'running'
0132 {prog} run_cfg.py
0133 if [ $? != 0 ]; then
0134     echo wrong exit code! removing all root files
0135     rm *.root
0136     exit 1 
0137 fi
0138 echo 'sending the job directory back'
0139 """.format(prog=prog)
0140 
0141    if remoteDir != '':
0142       script += rootfiles_to_eos_script(index, remoteDir)
0143 
0144    return script
0145 
0146 
0147 class CmsBatchException( Exception):
0148    '''Exception class for this script'''
0149    
0150    def __init__(self, value):
0151       self.value = value
0152       
0153    def __str__(self):
0154       return str( self.value)
0155 
0156 
0157 class MyBatchManager( BatchManager ):
0158    '''Batch manager specific to cmsRun processes.''' 
0159 
0160    def PrepareJobUser(self, jobDir, value ):
0161       '''Prepare one job. This function is called by the base class.'''
0162       
0163       process.source = fullSource.clone()
0164       
0165       #prepare the batch script
0166       scriptFileName = jobDir+'/batchScript.sh'
0167       scriptFile = open(scriptFileName,'w')
0168       storeDir = self.remoteOutputDir_.replace('/castor/cern.ch/cms','')
0169       mode = self.RunningMode(options.batch)
0170       if mode == 'LXPLUS':
0171          scriptFile.write( batchScriptCERN( storeDir, value) )    #here is the call to batchScriptCERN, i need to change value
0172       elif mode == 'LOCAL':
0173          scriptFile.write( batchScriptLocal( storeDir, value) )   #same as above but for batchScriptLocal
0174       scriptFile.close()
0175       os.system('chmod +x %s' % scriptFileName)
0176 
0177       #prepare the cfg
0178       # replace the list of fileNames by a chunk of filenames:
0179       if generator:
0180          randSvc = RandomNumberServiceHelper(process.RandomNumberGeneratorService)
0181          randSvc.populate()
0182       else:
0183          iFileMin = (value-1)*grouping 
0184          iFileMax = (value)*grouping 
0185          process.source.fileNames = fullSource.fileNames[iFileMin:iFileMax]
0186          print(process.source)
0187       cfgFile = open(jobDir+'/run_cfg.py','w')
0188       cfgFile.write('import FWCore.ParameterSet.Config as cms\n\n')
0189       cfgFile.write('import os,sys\n')
0190       # need to import most of the config from the base directory containing all jobs
0191       cfgFile.write("sys.path.append('%s')\n" % os.path.dirname(jobDir) )
0192       cfgFile.write('from base_cfg import *\n')
0193       cfgFile.write('process.source = ' + process.source.dumpPython() + '\n')
0194       if generator:
0195          cfgFile.write('process.RandomNumberGeneratorService = ' + process.RandomNumberGeneratorService.dumpPython() + '\n')
0196       cfgFile.close()
0197 
0198 
0199 batchManager = MyBatchManager()
0200 
0201 
0202 file = open('cmsBatch.txt', 'w')
0203 file.write(string.join(sys.argv) + "\n")
0204 file.close()
0205 
0206 batchManager.parser_.usage = """
0207 %prog [options] <number of input files per job> <your_cfg.py>.
0208 
0209 Submits a number of jobs taking your_cfg.py as a template. your_cfg.py can either read events from input files, or produce them with a generator. In the later case, the seeds are of course updated for each job.
0210 
0211 A local output directory is created locally. This directory contains a job directory for each job, and a Logger/ directory containing information on the software you are using. 
0212 By default:
0213 - the name of the output directory is created automatically.
0214 - the output root files end up in the job directories.
0215 
0216 Each job directory contains:
0217 - the full python configuration for this job. You can run it interactively by doing:
0218 cmsRun run_cfg.py
0219 - the batch script to run the job. You can submit it again by calling the batch command yourself, see the -b option.
0220 - while running interactively: nohup.out, where the job stderr and stdout are redirected. To check the status of a job running interactively, do:
0221 tail nohup.out
0222 - after running:
0223   o the full nohup.out (your log) and your root files, in case you ran interactively
0224   o the LSF directory, in case you ran on LSF
0225 
0226 Also see fwBatch.py, which is a layer on top of cmsBatch.py adapted to the organization of our samples on the CMST3. 
0227 
0228 Examples:
0229 
0230 First do:
0231 cd $CMSSW_BASE/src/CMGTools/Common/test
0232 
0233 to run on your local machine:
0234 cmsBatch.py 1 testCMGTools_cfg.py -b 'nohup ./batchScript.sh&' 
0235 
0236 to run on LSF (you must be logged on lxplus, not on your interactive machine, so that you have access to LSF)
0237 cmsBatch.py 1 testCMGTools_cfg.py -b 'bsub -q 8nm < ./batchScript.sh' 
0238 """
0239 batchManager.parser_.add_option("-p", "--program", dest="prog",
0240                                 help="program to run on your cfg file",
0241                                 default="cmsRun")
0242 ## batchManager.parser_.add_option("-b", "--batch", dest="batch",
0243 ##                                 help="batch command. default is: 'bsub -q 8nh < batchScript.sh'. You can also use 'nohup < ./batchScript.sh &' to run locally.",
0244 ##                                 default="bsub -q 8nh < .batchScript.sh")
0245 batchManager.parser_.add_option("-c", "--command-args", dest="cmdargs",
0246                                 help="command line arguments for the job",
0247                                 default=None)
0248 batchManager.parser_.add_option("--notagCVS", dest="tagPackages",
0249                                 default=True,action="store_false",
0250                                 help="tag the package on CVS (True)")
0251 
0252 (options,args) = batchManager.parser_.parse_args()
0253 batchManager.ParseOptions()
0254 
0255 prog = options.prog
0256 doCVSTag = options.tagPackages
0257 
0258 if len(args)!=2:
0259    batchManager.parser_.print_help()
0260    sys.exit(1)
0261 
0262 # testing that we run a sensible batch command. If not, exit.
0263 runningMode = None
0264 try:
0265    runningMode = batchManager.RunningMode( options.batch )
0266 except CmsBatchException as err:
0267    print(err)
0268    sys.exit(1)
0269 
0270 grouping = int(args[0])
0271 nJobs = grouping
0272 cfgFileName = args[1]
0273 
0274 print('Loading cfg')
0275 
0276 pycfg_params = options.cmdargs
0277 trueArgv = sys.argv
0278 sys.argv = [cfgFileName]
0279 if pycfg_params:
0280    sys.argv.extend(pycfg_params.split(' '))
0281 print(sys.argv)
0282 
0283 
0284 # load cfg script
0285 handle = open(cfgFileName, 'r')
0286 cfo = imp.load_source("pycfg", cfgFileName, handle)
0287 process = cfo.process
0288 handle.close()
0289 
0290 # Restore original sys.argv
0291 sys.argv = trueArgv
0292 
0293 
0294 # keep track of the original source
0295 fullSource = process.source.clone()
0296 generator = False
0297 
0298 try:
0299    process.source.fileNames
0300 except:
0301    print('No input file. This is a generator process.')
0302    generator = True
0303    listOfValues = [i+1 for i in range( nJobs )] #Here is where the list of values is created 
0304 else:
0305    print("Number of files in the source:",len(process.source.fileNames), ":")
0306    pprint.pprint(process.source.fileNames)
0307    nFiles = len(process.source.fileNames)
0308    nJobs = nFiles / grouping
0309    if (nJobs!=0 and (nFiles % grouping) > 0) or nJobs==0:
0310       nJobs = nJobs + 1
0311       
0312    print("number of jobs to be created: ", nJobs)
0313    listOfValues = [i+1 for i in range( nJobs )] #OR Here is where the list of values is created
0314    #here i change from e.g 0-19 to 1-20
0315 
0316 batchManager.PrepareJobs( listOfValues ) #PrepareJobs with listOfValues as param
0317 
0318 # preparing master cfg file
0319 
0320 cfgFile = open(batchManager.outputDir_+'/base_cfg.py','w')
0321 cfgFile.write( process.dumpPython() + '\n')
0322 cfgFile.close()
0323 
0324 # need to wait 5 seconds to give castor some time
0325 # now on EOS, should be ok. reducing to 1 sec
0326 waitingTime = 1
0327 if runningMode == 'LOCAL':
0328    # of course, not the case when running with nohup
0329    # because we will never have enough processes to saturate castor.
0330    waitingTime = 0
0331 batchManager.SubmitJobs( waitingTime )
0332 
0333 
0334 # logging
0335 
0336 from PhysicsTools.HeppyCore.utils.logger import logger
0337 
0338 oldPwd = os.getcwd()
0339 os.chdir(batchManager.outputDir_)
0340 logDir = 'Logger'
0341 os.system( 'mkdir ' + logDir )
0342 log = logger( logDir )
0343 
0344 log.logCMSSW()
0345 log.logJobs(nJobs)
0346 #COLIN not so elegant... but tar is behaving in a strange way.
0347 log.addFile( oldPwd + '/' + cfgFileName )
0348 
0349 if not batchManager.options_.negate:
0350    if batchManager.remoteOutputDir_ != "":
0351       # we don't want to crush an existing log file on castor
0352       #COLIN could protect the logger against that.
0353       log.stageOut( batchManager.remoteOutputDir_ )
0354       
0355 os.chdir( oldPwd )
0356 
0357