Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:28

0001 #!/usr/bin/env python3
0002 # Colin
0003 # batch mode for cmsRun, March 2009
0004 
0005 from __future__ import print_function
0006 from builtins import range
0007 import os, sys,  imp, re, pprint, string, time,shutil,copy,pickle,math
0008 from optparse import OptionParser
0009 
0010 # particle flow specific
0011 from PhysicsTools.HeppyCore.utils.batchmanager import BatchManager
0012 import PhysicsTools.HeppyCore.utils.eostools as eostools
0013 
0014 # cms specific
0015 import FWCore.ParameterSet.Config as cms
0016 from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
0017 
0018 
0019 def batchScriptCCIN2P3():
0020    script = """!/usr/bin/env bash
0021 #PBS -l platform=LINUX,u_sps_cmsf,M=2000MB,T=2000000
0022 # sets the queue
0023 #PBS -q T
0024 #PBS -eo
0025 #PBS -me
0026 #PBS -V
0027 
0028 source $HOME/.bash_profile
0029 
0030 echo '***********************'
0031 
0032 ulimit -v 3000000
0033 
0034 # coming back to submission dir do setup the env
0035 cd $PBS_O_WORKDIR
0036 eval `scramv1 ru -sh`
0037 
0038 
0039 # back to the worker
0040 cd -
0041 
0042 # copy job dir here
0043 cp -r $PBS_O_WORKDIR .
0044 
0045 # go inside
0046 jobdir=`ls`
0047 echo $jobdir
0048 
0049 cd $jobdir
0050 
0051 cat > sysinfo.sh <<EOF
0052 #! env bash
0053 echo '************** ENVIRONMENT ****************'
0054 
0055 env
0056 
0057 echo
0058 echo '************** WORKER *********************'
0059 echo
0060 
0061 free
0062 cat /proc/cpuinfo 
0063 
0064 echo
0065 echo '************** START *********************'
0066 echo
0067 EOF
0068 
0069 source sysinfo.sh > sysinfo.txt
0070 
0071 cmsRun run_cfg.py
0072 
0073 # copy job dir do disk
0074 cd -
0075 cp -r $jobdir $PBS_O_WORKDIR
0076 """
0077    return script
0078 
0079 
0080 
0081 def rootfiles_to_eos_script(index, remoteDir):
0082    remoteDir = eostools.eosToLFN(remoteDir)
0083    return """
0084 for file in *.root; do
0085 newFileName=`echo $file | sed -r -e 's/\./_{index}\./'`
0086 fullFileName={remoteDir}/$newFileName
0087 {eos} cp $file /eos/cms/$fullFileName
0088 {eos} chmod 755 /eos/cms/$fullFileName
0089 rm *.root
0090 done
0091 """.format(index=index, remoteDir=remoteDir, eos=eostools.eos_select)
0092 
0093 
0094 def batchScriptCERN(  remoteDir, index ):
0095    '''prepare the LSF version of the batch script, to run on LSF'''
0096    script = """#!/bin/bash
0097 # sets the queue
0098 #BSUB -q 8nm
0099 
0100 echo 'environment:'
0101 echo
0102 env
0103 ulimit -v 3000000
0104 echo 'copying job dir to worker'
0105 cd $CMSSW_BASE/src
0106 eval `scramv1 ru -sh`
0107 cd -
0108 cp -rf $LS_SUBCWD .
0109 ls
0110 cd `find . -type d | grep /`
0111 echo 'running'
0112 {prog} run_cfg.py
0113 if [ $? != 0 ]; then
0114     echo wrong exit code! removing all root files
0115     rm *.root
0116     exit 1 
0117 fi
0118 echo 'sending the job directory back'
0119 """.format(prog=prog)
0120 
0121    if remoteDir != '':
0122       script += rootfiles_to_eos_script(index, remoteDir)
0123 
0124    script += 'cp -rf * $LS_SUBCWD\n'
0125    
0126    return script
0127 
0128 def batchScriptLocal(  remoteDir, index ):
0129    '''prepare a local version of the batch script, to run using nohup'''
0130 
0131    script = """#!/bin/bash
0132 echo 'running'
0133 {prog} run_cfg.py
0134 if [ $? != 0 ]; then
0135     echo wrong exit code! removing all root files
0136     rm *.root
0137     exit 1 
0138 fi
0139 echo 'sending the job directory back'
0140 """.format(prog=prog)
0141 
0142    if remoteDir != '':
0143       script += rootfiles_to_eos_script(index, remoteDir)
0144 
0145    return script
0146 
0147 
0148 class CmsBatchException( Exception):
0149    '''Exception class for this script'''
0150    
0151    def __init__(self, value):
0152       self.value = value
0153       
0154    def __str__(self):
0155       return str( self.value)
0156 
0157 
0158 class MyBatchManager( BatchManager ):
0159    '''Batch manager specific to cmsRun processes.''' 
0160 
0161    def PrepareJobUser(self, jobDir, value ):
0162       '''Prepare one job. This function is called by the base class.'''
0163       
0164       process.source = fullSource.clone()
0165       
0166       #prepare the batch script
0167       scriptFileName = jobDir+'/batchScript.sh'
0168       scriptFile = open(scriptFileName,'w')
0169       storeDir = self.remoteOutputDir_.replace('/castor/cern.ch/cms','')
0170       mode = self.RunningMode(options.batch)
0171       if mode == 'LXPLUS':
0172          scriptFile.write( batchScriptCERN( storeDir, value) )    #here is the call to batchScriptCERN, i need to change value
0173       elif mode == 'LOCAL':
0174          scriptFile.write( batchScriptLocal( storeDir, value) )   #same as above but for batchScriptLocal
0175       scriptFile.close()
0176       os.system('chmod +x %s' % scriptFileName)
0177 
0178       #prepare the cfg
0179       # replace the list of fileNames by a chunk of filenames:
0180       if generator:
0181          randSvc = RandomNumberServiceHelper(process.RandomNumberGeneratorService)
0182          randSvc.populate()
0183       else:
0184          iFileMin = (value-1)*grouping 
0185          iFileMax = (value)*grouping 
0186          process.source.fileNames = fullSource.fileNames[iFileMin:iFileMax]
0187          print(process.source)
0188       cfgFile = open(jobDir+'/run_cfg.py','w')
0189       cfgFile.write('import FWCore.ParameterSet.Config as cms\n\n')
0190       cfgFile.write('import os,sys\n')
0191       # need to import most of the config from the base directory containing all jobs
0192       cfgFile.write("sys.path.append('%s')\n" % os.path.dirname(jobDir) )
0193       cfgFile.write('from base_cfg import *\n')
0194       cfgFile.write('process.source = ' + process.source.dumpPython() + '\n')
0195       if generator:
0196          cfgFile.write('process.RandomNumberGeneratorService = ' + process.RandomNumberGeneratorService.dumpPython() + '\n')
0197       cfgFile.close()
0198 
0199 
0200 batchManager = MyBatchManager()
0201 
0202 
0203 file = open('cmsBatch.txt', 'w')
0204 file.write(string.join(sys.argv) + "\n")
0205 file.close()
0206 
0207 batchManager.parser_.usage = """
0208 %prog [options] <number of input files per job> <your_cfg.py>.
0209 
0210 Submits a number of jobs taking your_cfg.py as a template. your_cfg.py can either read events from input files, or produce them with a generator. In the later case, the seeds are of course updated for each job.
0211 
0212 A local output directory is created locally. This directory contains a job directory for each job, and a Logger/ directory containing information on the software you are using. 
0213 By default:
0214 - the name of the output directory is created automatically.
0215 - the output root files end up in the job directories.
0216 
0217 Each job directory contains:
0218 - the full python configuration for this job. You can run it interactively by doing:
0219 cmsRun run_cfg.py
0220 - the batch script to run the job. You can submit it again by calling the batch command yourself, see the -b option.
0221 - while running interactively: nohup.out, where the job stderr and stdout are redirected. To check the status of a job running interactively, do:
0222 tail nohup.out
0223 - after running:
0224   o the full nohup.out (your log) and your root files, in case you ran interactively
0225   o the LSF directory, in case you ran on LSF
0226 
0227 Also see fwBatch.py, which is a layer on top of cmsBatch.py adapted to the organization of our samples on the CMST3. 
0228 
0229 Examples:
0230 
0231 First do:
0232 cd $CMSSW_BASE/src/CMGTools/Common/test
0233 
0234 to run on your local machine:
0235 cmsBatch.py 1 testCMGTools_cfg.py -b 'nohup ./batchScript.sh&' 
0236 
0237 to run on LSF (you must be logged on lxplus, not on your interactive machine, so that you have access to LSF)
0238 cmsBatch.py 1 testCMGTools_cfg.py -b 'bsub -q 8nm < ./batchScript.sh' 
0239 """
0240 batchManager.parser_.add_option("-p", "--program", dest="prog",
0241                                 help="program to run on your cfg file",
0242                                 default="cmsRun")
0243 ## batchManager.parser_.add_option("-b", "--batch", dest="batch",
0244 ##                                 help="batch command. default is: 'bsub -q 8nh < batchScript.sh'. You can also use 'nohup < ./batchScript.sh &' to run locally.",
0245 ##                                 default="bsub -q 8nh < .batchScript.sh")
0246 batchManager.parser_.add_option("-c", "--command-args", dest="cmdargs",
0247                                 help="command line arguments for the job",
0248                                 default=None)
0249 batchManager.parser_.add_option("--notagCVS", dest="tagPackages",
0250                                 default=True,action="store_false",
0251                                 help="tag the package on CVS (True)")
0252 
0253 (options,args) = batchManager.parser_.parse_args()
0254 batchManager.ParseOptions()
0255 
0256 prog = options.prog
0257 doCVSTag = options.tagPackages
0258 
0259 if len(args)!=2:
0260    batchManager.parser_.print_help()
0261    sys.exit(1)
0262 
0263 # testing that we run a sensible batch command. If not, exit.
0264 runningMode = None
0265 try:
0266    runningMode = batchManager.RunningMode( options.batch )
0267 except CmsBatchException as err:
0268    print(err)
0269    sys.exit(1)
0270 
0271 grouping = int(args[0])
0272 nJobs = grouping
0273 cfgFileName = args[1]
0274 
0275 print('Loading cfg')
0276 
0277 pycfg_params = options.cmdargs
0278 trueArgv = sys.argv
0279 sys.argv = [cfgFileName]
0280 if pycfg_params:
0281    sys.argv.extend(pycfg_params.split(' '))
0282 print(sys.argv)
0283 
0284 
0285 # load cfg script
0286 handle = open(cfgFileName, 'r')
0287 cfo = imp.load_source("pycfg", cfgFileName, handle)
0288 process = cfo.process
0289 handle.close()
0290 
0291 # Restore original sys.argv
0292 sys.argv = trueArgv
0293 
0294 
0295 # keep track of the original source
0296 fullSource = process.source.clone()
0297 generator = False
0298 
0299 try:
0300    process.source.fileNames
0301 except:
0302    print('No input file. This is a generator process.')
0303    generator = True
0304    listOfValues = [i+1 for i in range( nJobs )] #Here is where the list of values is created 
0305 else:
0306    print("Number of files in the source:",len(process.source.fileNames), ":")
0307    pprint.pprint(process.source.fileNames)
0308    nFiles = len(process.source.fileNames)
0309    nJobs = nFiles / grouping
0310    if (nJobs!=0 and (nFiles % grouping) > 0) or nJobs==0:
0311       nJobs = nJobs + 1
0312       
0313    print("number of jobs to be created: ", nJobs)
0314    listOfValues = [i+1 for i in range( nJobs )] #OR Here is where the list of values is created
0315    #here i change from e.g 0-19 to 1-20
0316 
0317 batchManager.PrepareJobs( listOfValues ) #PrepareJobs with listOfValues as param
0318 
0319 # preparing master cfg file
0320 
0321 cfgFile = open(batchManager.outputDir_+'/base_cfg.py','w')
0322 cfgFile.write( process.dumpPython() + '\n')
0323 cfgFile.close()
0324 
0325 # need to wait 5 seconds to give castor some time
0326 # now on EOS, should be ok. reducing to 1 sec
0327 waitingTime = 1
0328 if runningMode == 'LOCAL':
0329    # of course, not the case when running with nohup
0330    # because we will never have enough processes to saturate castor.
0331    waitingTime = 0
0332 batchManager.SubmitJobs( waitingTime )
0333 
0334 
0335 # logging
0336 
0337 from PhysicsTools.HeppyCore.utils.logger import logger
0338 
0339 oldPwd = os.getcwd()
0340 os.chdir(batchManager.outputDir_)
0341 logDir = 'Logger'
0342 os.system( 'mkdir ' + logDir )
0343 log = logger( logDir )
0344 
0345 log.logCMSSW()
0346 log.logJobs(nJobs)
0347 #COLIN not so elegant... but tar is behaving in a strange way.
0348 log.addFile( oldPwd + '/' + cfgFileName )
0349 
0350 if not batchManager.options_.negate:
0351    if batchManager.remoteOutputDir_ != "":
0352       # we don't want to crush an existing log file on castor
0353       #COLIN could protect the logger against that.
0354       log.stageOut( batchManager.remoteOutputDir_ )
0355       
0356 os.chdir( oldPwd )
0357 
0358