OfflineValidation/test/submitAllJobs.py

0001 #!/usr/bin/env python3
0002
0003 '''Script that submits CMS Tracker Alignment Primary Vertex Validation workflows
0004 '''
0005
0006 from builtins import range
0007 __author__ = 'Marco Musich'
0008 __copyright__ = 'Copyright 2015, CERN CMS'
0009 __credits__ = ['Ernesto Migliore', 'Salvatore Di Guida', 'Javier Duarte']
0010 __license__ = 'Unknown'
0011 __maintainer__ = 'Marco Musich'
0012 __email__ = 'marco.musich@cern.ch'
0013 __version__ = 1
0014
0015 import datetime,time
0016 import os,sys
0017 import copy
0018 import string, re
0019 import configparser as ConfigParser, json
0020 from optparse import OptionParser
0021 from subprocess import Popen, PIPE
0022
0023 CopyRights  = '##################################\n'
0024 CopyRights += '#      submitAllJobs Script      #\n'
0025 CopyRights += '#      marco.musich@cern.ch      #\n'
0026 CopyRights += '#         December 2015          #\n'
0027 CopyRights += '##################################\n'
0028
0029 ##############################################
0030 def drawProgressBar(percent, barLen=40):
0031 ##############################################
0032     sys.stdout.write("\r")
0033     progress = ""
0034     for i in range(barLen):
0035         if i < int(barLen * percent):
0036             progress += "="
0037         else:
0038             progress += " "
0039     sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
0040     sys.stdout.flush()
0041
0042 ##############################################
0043 def getCommandOutput(command):
0044 ##############################################
0045     """This function executes `command` and returns it output.
0046     Arguments:
0047     - `command`: Shell command to be invoked by this function.
0048     """
0049     child = os.popen(command)
0050     data = child.read()
0051     err = child.close()
0052     if err:
0053         print('%s failed w/ exit code %d' % (command, err))
0054     return data
0055
0056 ##############################################
0057 def to_bool(value):
0058 ##############################################
0059     """
0060        Converts 'something' to boolean. Raises exception for invalid formats
0061            Possible True  values: 1, True, "1", "TRue", "yes", "y", "t"
0062            Possible False values: 0, False, None, [], {}, "", "0", "faLse", "no", "n", "f", 0.0, ...
0063     """
0064     if str(value).lower() in ("yes", "y", "true",  "t", "1"): return True
0065     if str(value).lower() in ("no",  "n", "false", "f", "0", "0.0", "", "none", "[]", "{}"): return False
0066     raise Exception('Invalid value for boolean conversion: ' + str(value))
0067
0068 ####################--- Classes ---############################
0069 class BetterConfigParser(ConfigParser.ConfigParser):
0070
0071     ##############################################
0072     def optionxform(self, optionstr):
0073         return optionstr
0074
0075     ##############################################
0076     def exists( self, section, option):
0077          try:
0078              items = self.items(section)
0079          except ConfigParser.NoSectionError:
0080              return False
0081          for item in items:
0082              if item[0] == option:
0083                  return True
0084          return False
0085
0086     ##############################################
0087     def __updateDict( self, dictionary, section ):
0088         result = dictionary
0089         try:
0090             for option in self.options( section ):
0091                 result[option] = self.get( section, option )
0092             if "local"+section.title() in self.sections():
0093                 for option in self.options( "local"+section.title() ):
0094                     result[option] = self.get( "local"+section.title(),option )
0095         except ConfigParser.NoSectionError as section:
0096             msg = ("%s in configuration files. This section is mandatory."
0097                    %(str(section).replace(":", "", 1)))
0098             #raise AllInOneError(msg)
0099         return result
0100
0101     ##############################################
0102     def getResultingSection( self, section, defaultDict = {}, demandPars = [] ):
0103         result = copy.deepcopy(defaultDict)
0104         for option in demandPars:
0105             try:
0106                 result[option] = self.get( section, option )
0107             except ConfigParser.NoOptionError as globalSectionError:
0108                 globalSection = str( globalSectionError ).split( "'" )[-2]
0109                 splittedSectionName = section.split( ":" )
0110                 if len( splittedSectionName ) > 1:
0111                     localSection = ("local"+section.split( ":" )[0].title()+":"
0112                                     +section.split(":")[1])
0113                 else:
0114                     localSection = ("local"+section.split( ":" )[0].title())
0115                 if self.has_section( localSection ):
0116                     try:
0117                         result[option] = self.get( localSection, option )
0118                     except ConfigParser.NoOptionError as option:
0119                         msg = ("%s. This option is mandatory."
0120                                %(str(option).replace(":", "", 1).replace(
0121                                    "section",
0122                                    "section '"+globalSection+"' or", 1)))
0123                         #raise AllInOneError(msg)
0124                 else:
0125                     msg = ("%s. This option is mandatory."
0126                            %(str(globalSectionError).replace(":", "", 1)))
0127                     #raise AllInOneError(msg)
0128         result = self.__updateDict( result, section )
0129         #print result
0130         return result
0131
0132 ##### method to parse the input file ################################
0133 def ConfigSectionMap(config, section):
0134     the_dict = {}
0135     options = config.options(section)
0136     for option in options:
0137         try:
0138             the_dict[option] = config.get(section, option)
0139             if the_dict[option] == -1:
0140                 DebugPrint("skip: %s" % option)
0141         except:
0142             print("exception on %s!" % option)
0143             the_dict[option] = None
0144     return the_dict
0145
0146 ###### method to create recursively directories on EOS #############
0147 def mkdir_eos(out_path):
0148     newpath='/'
0149     for dir in out_path.split('/'):
0150         newpath=os.path.join(newpath,dir)
0151         # do not issue mkdir from very top of the tree
0152         if newpath.find('test_out') > 0:
0153             p = subprocess.Popen(["eos", "mkdir", newpath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0154             (out, err) = p.communicate()
0155             p.wait()
0156
0157     # now check that the directory exists
0158     p = subprocess.Popen(["eos", "ls", out_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0159     (out, err) = p.communicate()
0160     p.wait()
0161     if p.returncode !=0:
0162         print(out)
0163
0164 def split(sequence, size):
0165 ##########################
0166 # aux generator function to split lists
0167 # based on http://sandrotosi.blogspot.com/2011/04/python-group-list-in-sub-lists-of-n.html
0168 # about generators see also http://stackoverflow.com/questions/231767/the-python-yield-keyword-explained
0169 ##########################
0170     for i in range(0, len(sequence), size):
0171         yield sequence[i:i+size]
0172
0173 #############
0174 class Job:
0175 #############
0176
0177     def __init__(self, job_id, job_name, isDA, isMC, applyBOWS, applyEXTRACOND, extraconditions, runboundary, lumilist, maxevents, gt, allFromGT, alignmentDB, alignmentTAG, apeDB, apeTAG, bowDB, bowTAG, vertextype, tracktype, applyruncontrol, ptcut, CMSSW_dir ,the_dir):
0178 ###############################
0179         self.job_id=job_id
0180         self.batch_job_id = None
0181         self.job_name=job_name
0182
0183         self.isDA              = isDA
0184         self.isMC              = isMC
0185         self.applyBOWS         = applyBOWS
0186         self.applyEXTRACOND    = applyEXTRACOND
0187         self.extraCondVect     = extraconditions
0188         self.runboundary       = runboundary
0189         self.lumilist          = lumilist
0190         self.maxevents         = maxevents
0191         self.gt                = gt
0192         self.allFromGT         = allFromGT
0193         self.alignmentDB       = alignmentDB
0194         self.alignmentTAG      = alignmentTAG
0195         self.apeDB             = apeDB
0196         self.apeTAG            = apeTAG
0197         self.bowDB             = bowDB
0198         self.bowTAG            = bowTAG
0199         self.vertextype        = vertextype
0200         self.tracktype         = tracktype
0201         self.applyruncontrol   = applyruncontrol
0202         self.ptcut             = ptcut
0203
0204         self.the_dir=the_dir
0205         self.CMSSW_dir=CMSSW_dir
0206
0207         self.output_full_name=self.getOutputBaseName()+"_"+str(self.job_id)
0208
0209         self.cfg_dir=None
0210         self.outputCfgName=None
0211
0212         # LSF variables
0213         self.LSF_dir=None
0214         self.output_LSF_name=None
0215
0216         self.lfn_list=list()
0217
0218         #self.OUTDIR = "" # TODO: write a setter method
0219         #self.OUTDIR = self.createEOSout()
0220
0221     def __del__(self):
0222 ###############################
0223         del self.lfn_list
0224
0225     def setEOSout(self,theEOSdir):
0226 ###############################
0227         self.OUTDIR = theEOSdir
0228
0229     def getOutputBaseName(self):
0230 ########################
0231         return "PVValidation_"+self.job_name
0232
0233     def createTheCfgFile(self,lfn):
0234 ###############################
0235
0236         global CopyRights
0237
0238         # write the cfg file
0239         self.cfg_dir = os.path.join(self.the_dir,"cfg")
0240         if not os.path.exists(self.cfg_dir):
0241             os.makedirs(self.cfg_dir)
0242
0243         self.outputCfgName=self.output_full_name+"_cfg.py"
0244         fout=open(os.path.join(self.cfg_dir,self.outputCfgName),'w+b')
0245
0246         # decide which template according to data/mc
0247         if self.isMC:
0248             template_cfg_file = os.path.join(self.the_dir,"PVValidation_TEMPL_cfg.py")
0249         else:
0250             template_cfg_file = os.path.join(self.the_dir,"PVValidation_TEMPL_cfg.py")
0251
0252         fin = open(template_cfg_file)
0253
0254         config_txt = '\n\n' + CopyRights + '\n\n'
0255         config_txt += fin.read()
0256
0257         config_txt=config_txt.replace("ISDATEMPLATE",self.isDA)
0258         config_txt=config_txt.replace("ISMCTEMPLATE",self.isMC)
0259         config_txt=config_txt.replace("APPLYBOWSTEMPLATE",self.applyBOWS)
0260         config_txt=config_txt.replace("EXTRACONDTEMPLATE",self.applyEXTRACOND)
0261         config_txt=config_txt.replace("USEFILELISTTEMPLATE","True")
0262         config_txt=config_txt.replace("RUNBOUNDARYTEMPLATE",self.runboundary)
0263         config_txt=config_txt.replace("LUMILISTTEMPLATE",self.lumilist)
0264         config_txt=config_txt.replace("MAXEVENTSTEMPLATE",self.maxevents)
0265         config_txt=config_txt.replace("GLOBALTAGTEMPLATE",self.gt)
0266         config_txt=config_txt.replace("ALLFROMGTTEMPLATE",self.allFromGT)
0267         config_txt=config_txt.replace("ALIGNOBJTEMPLATE",self.alignmentDB)
0268         config_txt=config_txt.replace("GEOMTAGTEMPLATE",self.alignmentTAG)
0269         config_txt=config_txt.replace("APEOBJTEMPLATE",self.apeDB)
0270         config_txt=config_txt.replace("ERRORTAGTEMPLATE",self.apeTAG)
0271         config_txt=config_txt.replace("BOWSOBJECTTEMPLATE",self.bowDB)
0272         config_txt=config_txt.replace("BOWSTAGTEMPLATE",self.bowTAG)
0273         config_txt=config_txt.replace("VERTEXTYPETEMPLATE",self.vertextype)
0274         config_txt=config_txt.replace("TRACKTYPETEMPLATE",self.tracktype)
0275         config_txt=config_txt.replace("PTCUTTEMPLATE",self.ptcut)
0276         config_txt=config_txt.replace("RUNCONTROLTEMPLATE",self.applyruncontrol)
0277         lfn_with_quotes = map(lambda x: "\'"+x+"\'",lfn)
0278         config_txt=config_txt.replace("FILESOURCETEMPLATE","["+",".join(lfn_with_quotes)+"]")
0279         config_txt=config_txt.replace("OUTFILETEMPLATE",self.output_full_name+".root")
0280
0281         fout.write(config_txt)
0282
0283         for line in fin.readlines():
0284
0285             if 'END OF EXTRA CONDITIONS' in line:
0286                 for element in self.extraCondVect :
0287                     if("Rcd" in element):
0288                         params = self.extraCondVect[element].split(',')
0289
0290                         fout.write(" \n")
0291                         fout.write("     process.conditionsIn"+element+"= CalibTracker.Configuration.Common.PoolDBESSource_cfi.poolDBESSource.clone( \n")
0292                         fout.write("          connect = cms.string('"+params[0]+"'), \n")
0293                         fout.write("          toGet = cms.VPSet(cms.PSet(record = cms.string('"+element+"'), \n")
0294                         fout.write("                                     tag = cms.string('"+params[1]+"'), \n")
0295                         if (len(params)>2):
0296                             fout.write("                                     label = cms.string('"+params[2]+"') \n")
0297                         fout.write("                                     ) \n")
0298                         fout.write("                            ) \n")
0299                         fout.write("          ) \n")
0300                         fout.write("     process.prefer_conditionsIn"+element+" = cms.ESPrefer(\"PoolDBESSource\", \"conditionsIn"+element[0]+"\") \n \n")
0301
0302             fout.write(line)
0303
0304         fout.close()
0305
0306     def createTheLSFFile(self):
0307 ###############################
0308
0309        # directory to store the LSF to be submitted
0310         self.LSF_dir = os.path.join(self.the_dir,"LSF")
0311         if not os.path.exists(self.LSF_dir):
0312             os.makedirs(self.LSF_dir)
0313
0314         self.output_LSF_name=self.output_full_name+".lsf"
0315         fout=open(os.path.join(self.LSF_dir,self.output_LSF_name),'w')
0316
0317         job_name = self.output_full_name
0318
0319         log_dir = os.path.join(self.the_dir,"log")
0320         if not os.path.exists(log_dir):
0321             os.makedirs(log_dir)
0322
0323         fout.write("#!/bin/sh \n")
0324         fout.write("#BSUB -L /bin/sh\n")
0325         fout.write("#BSUB -J "+job_name+"\n")
0326         fout.write("#BSUB -o "+os.path.join(log_dir,job_name+".log")+"\n")
0327         fout.write("#BSUB -q cmscaf1nd \n")
0328         fout.write("JobName="+job_name+" \n")
0329         fout.write("OUT_DIR="+self.OUTDIR+" \n")
0330         fout.write("LXBATCH_DIR=`pwd` \n")
0331         fout.write("cd "+os.path.join(self.CMSSW_dir,"src")+" \n")
0332         fout.write("eval `scram runtime -sh` \n")
0333         fout.write("cd $LXBATCH_DIR \n")
0334         fout.write("cmsRun "+os.path.join(self.cfg_dir,self.outputCfgName)+" \n")
0335         fout.write("ls -lh . \n")
0336         fout.write("for RootOutputFile in $(ls *root ); do xrdcp -f ${RootOutputFile}  root://eoscms//eos/cms${OUT_DIR}/${RootOutputFile} ; done \n")
0337         fout.write("for TxtOutputFile in $(ls *txt ); do xrdcp -f ${TxtOutputFile}  root://eoscms//eos/cms${OUT_DIR}/${TxtOutputFile} ; done \n")
0338
0339         fout.close()
0340
0341     def getOutputFileName(self):
0342 ############################################
0343         return os.path.join(self.OUTDIR,self.output_full_name+".root")
0344
0345     def submit(self):
0346 ###############################
0347         print("submit job", self.job_id)
0348         job_name = self.output_full_name
0349         submitcommand1 = "chmod u+x " + os.path.join(self.LSF_dir,self.output_LSF_name)
0350         child1  = os.system(submitcommand1)
0351         #submitcommand2 = "bsub < "+os.path.join(self.LSF_dir,self.output_LSF_name)
0352         #child2  = os.system(submitcommand2)
0353         self.batch_job_id = getCommandOutput("bsub < "+os.path.join(self.LSF_dir,self.output_LSF_name))
0354
0355     def getBatchjobId(self):
0356 ############################################
0357        return self.batch_job_id.split("<")[1].split(">")[0]
0358
0359 ##############################################
0360 def main():
0361 ##############################################
0362
0363     global CopyRights
0364     print('\n'+CopyRights)
0365
0366     # CMSSW section
0367     input_CMSSW_BASE = os.environ.get('CMSSW_BASE')
0368     AnalysisStep_dir = os.path.join(input_CMSSW_BASE,"src/Alignment/OfflineValidation/test")
0369     sourceModule     = os.path.join(input_CMSSW_BASE,"src/Alignment/OfflineValidation/test","PVValidation_HLTPhysics2015B_TkAlMinBias_cff.py")
0370     lib_path = os.path.abspath(AnalysisStep_dir)
0371     sys.path.append(lib_path)
0372
0373     ## N.B.: this is dediced here once and for all
0374     srcFiles        = []
0375
0376     desc="""This is a description of %prog."""
0377     parser = OptionParser(description=desc,version='%prog version 0.1')
0378     parser.add_option('-s','--submit',    help='job submitted',    dest='submit',     action='store_true',  default=False)
0379     parser.add_option('-j','--jobname',   help='task name',        dest='taskname',   action='store',       default='')
0380     parser.add_option('-D','--dataset',   help='selected dataset', dest='data',       action='store'      , default='')
0381     parser.add_option('-r','--doRunBased',help='selected dataset', dest='doRunBased', action='store_true' , default=False)
0382     parser.add_option('-i','--input',     help='set input configuration (overrides default)', dest='inputconfig',action='store',default=None)
0383
0384     (opts, args) = parser.parse_args()
0385
0386     now = datetime.datetime.now()
0387     t = now.strftime("test_%Y_%m_%d_%H_%M_%S_DATA_")
0388     t+=opts.taskname
0389
0390     USER = os.environ.get('USER')
0391     eosdir=os.path.join("/store/caf/user",USER,"test_out",t)
0392     #mkdir_eos(eosdir)
0393
0394     #### Initialize all the variables
0395
0396     jobName         = None
0397     isMC            = None
0398     isDA            = None
0399     doRunBased      = False
0400     maxevents       = None
0401
0402     gt              = None
0403     allFromGT       = None
0404     applyEXTRACOND  = None
0405     extraCondVect   = None
0406     alignmentDB     = None
0407     alignmentTAG    = None
0408     apeDB           = None
0409     apeTAG          = None
0410     applyBOWS       = None
0411     bowDB           = None
0412     bowTAG          = None
0413
0414     vertextype      = None
0415     tracktype       = None
0416
0417     applyruncontrol = None
0418     ptcut           = None
0419     runboundary     = None
0420     lumilist        = None
0421
0422     ConfigFile = opts.inputconfig
0423
0424     if ConfigFile is not None:
0425
0426         print("********************************************************")
0427         print("* Parsing from input file:", ConfigFile," ")
0428
0429         #config = ConfigParser.ConfigParser()
0430         #config.read(ConfigFile)
0431
0432         config = BetterConfigParser()
0433         config.read(ConfigFile)
0434
0435         #print  config.sections()
0436
0437         # please notice: since in principle one wants to run on several different samples simultaneously,
0438         # all these inputs are vectors
0439
0440         jobName          = [ConfigSectionMap(config,"Job")['jobname']]
0441         isDA             = [ConfigSectionMap(config,"Job")['isda']]
0442         isMC             = [ConfigSectionMap(config,"Job")['ismc']]
0443         doRunBased       = opts.doRunBased
0444         maxevents        = [ConfigSectionMap(config,"Job")['maxevents']]
0445
0446         gt               = [ConfigSectionMap(config,"Conditions")['gt']]
0447         allFromGT        = [ConfigSectionMap(config,"Conditions")['allFromGT']]
0448         applyEXTRACOND   = [ConfigSectionMap(config,"Conditions")['applyextracond']]
0449         conditions       = [config.getResultingSection("ExtraConditions")]
0450
0451         alignmentDB      = [ConfigSectionMap(config,"Conditions")['alignmentdb']]
0452         alignmentTAG     = [ConfigSectionMap(config,"Conditions")['alignmenttag']]
0453         apeDB            = [ConfigSectionMap(config,"Conditions")['apedb']]
0454         apeTAG           = [ConfigSectionMap(config,"Conditions")['apetag']]
0455         applyBOWS        = [ConfigSectionMap(config,"Conditions")['applybows']]
0456         bowDB            = [ConfigSectionMap(config,"Conditions")['bowdb']]
0457         bowTAG           = [ConfigSectionMap(config,"Conditions")['bowtag']]
0458
0459         vertextype       = [ConfigSectionMap(config,"Type")['vertextype']]
0460         tracktype        = [ConfigSectionMap(config,"Type")['tracktype']]
0461
0462         applyruncontrol  = [ConfigSectionMap(config,"Selection")['applyruncontrol']]
0463         ptcut            = [ConfigSectionMap(config,"Selection")['ptcut']]
0464         runboundary      = [ConfigSectionMap(config,"Selection")['runboundary']]
0465         lumilist         = [ConfigSectionMap(config,"Selection")['lumilist']]
0466
0467     else :
0468
0469         print("********************************************************")
0470         print("* Parsing from command line                            *")
0471         print("********************************************************")
0472
0473         jobName         = ['MinBiasQCD_CSA14Ali_CSA14APE']
0474         isDA            = ['True']
0475         isMC            = ['True']
0476         doRunBased      = opts.doRunBased
0477         maxevents       = ['10000']
0478
0479         gt              = ['START53_V7A::All']
0480         allFromGT       = ['False']
0481         applyEXTRACOND  = ['False']
0482         conditions      = [[('SiPixelTemplateDBObjectRcd','frontier://FrontierProd/CMS_COND_31X_PIXEL','SiPixelTemplates38T_2010_2011_mc'),
0483                             ('SiPixelQualityFromDBRcd','frontier://FrontierProd/CMS_COND_31X_PIXEL','SiPixelQuality_v20_mc')]]
0484         alignmentDB     = ['sqlite_file:/afs/cern.ch/cms/CAF/CMSALCA/ALCA_TRACKERALIGN/PayLoads/TkAl-14-02_CSA14/Alignments_CSA14_v1.db']
0485         alignmentTAG    = ['TrackerCSA14Scenario']
0486         apeDB           = ['sqlite_file:/afs/cern.ch/cms/CAF/CMSALCA/ALCA_TRACKERALIGN/PayLoads/TkAl-14-02_CSA14/AlignmentErrors_CSA14_v1.db']
0487         apeTAG          = ['TrackerCSA14ScenarioErrors']
0488         applyBOWS       = ['True']
0489         bowDB           = ['frontier://FrontierProd/CMS_COND_310X_ALIGN']
0490         bowTAG          = ['TrackerSurfaceDeformations_2011Realistic_v2_mc']
0491
0492         vertextype      = ['offlinePrimaryVertices']
0493         tracktype       = ['ALCARECOTkAlMinBias']
0494
0495         applyruncontrol = ['False']
0496         ptcut           = ['3']
0497         runboundary     = ['1']
0498         lumilist        = ['']
0499
0500     # start loop on samples
0501
0502     # print some of the configuration
0503
0504     print("********************************************************")
0505     print("* Configuration info *")
0506     print("********************************************************")
0507     print("- submitted   : ",opts.submit)
0508     print("- Jobname     : ",jobName)
0509     print("- use DA      : ",isDA)
0510     print("- is MC       : ",isMC)
0511     print("- is run-based: ",doRunBased)
0512     print("- evts/job    : ",maxevents)
0513     print("- GlobatTag   : ",gt)
0514     print("- allFromGT?  : ",allFromGT)
0515     print("- extraCond?  : ",applyEXTRACOND)
0516     print("- extraCond   : ",conditions)
0517     print("- Align db    : ",alignmentDB)
0518     print("- Align tag   : ",alignmentTAG)
0519     print("- APE db      : ",apeDB)
0520     print("- APE tag     : ",apeTAG)
0521     print("- use bows?   : ",applyBOWS)
0522     print("- K&B db      : ",bowDB)
0523     print("- K&B tag     : ",bowTAG)
0524     print("- VertexColl  : ",vertextype)
0525     print("- TrackColl   : ",tracktype)
0526     print("- RunControl? : ",applyruncontrol)
0527     print("- Pt>           ",ptcut)
0528     print("- run=          ",runboundary)
0529     print("- JSON        : ",lumilist)
0530     print("********************************************************")
0531
0532     sublogging_dir = os.path.join(AnalysisStep_dir,"submissions")
0533     if not os.path.exists(sublogging_dir):
0534         os.makedirs(sublogging_dir)
0535     submission_log_file = os.path.join(sublogging_dir,"sub"+t+".log")
0536     log_fout = open(submission_log_file,'w')
0537     for iConf in range(len(jobName)):
0538         log_fout.write("============================================================ \n")
0539         log_fout.write("- timestamp   : "+t.strip("test_")+"\n")
0540         log_fout.write("- submitted   : "+str(opts.submit)+"\n")
0541         log_fout.write("- Jobname     : "+jobName[iConf]+"\n")
0542         log_fout.write("- use DA      : "+isDA[iConf]+"\n")
0543         log_fout.write("- is MC       : "+isMC[iConf]+"\n")
0544         log_fout.write("- is run-based: "+str(doRunBased)+"\n")
0545         log_fout.write("- evts/job    : "+maxevents[iConf]+"\n")
0546         log_fout.write("- GlobatTag   : "+gt[iConf]+"\n")
0547         log_fout.write("- allFromGT?  : "+allFromGT[iConf]+"\n")
0548         log_fout.write("- extraCond?  : "+applyEXTRACOND[iConf]+"\n")
0549         for x in conditions:
0550             for attribute,value in x.items():
0551                      log_fout.write('   - {} : {}'.format(attribute, value)+"\n")
0552         log_fout.write("- Align db    : "+alignmentDB[iConf]+"\n")
0553         log_fout.write("- Align tag   : "+alignmentTAG[iConf]+"\n")
0554         log_fout.write("- APE db      : "+apeDB[iConf]+"\n")
0555         log_fout.write("- APE tag     : "+apeTAG[iConf]+"\n")
0556         log_fout.write("- use bows?   : "+applyBOWS[iConf]+"\n")
0557         log_fout.write("- K&B db      : "+bowDB[iConf]+"\n")
0558         log_fout.write("- K&B tag     : "+bowTAG[iConf]+"\n")
0559         log_fout.write("- VertexColl  : "+vertextype[iConf]+"\n")
0560         log_fout.write("- TrackColl   : "+tracktype[iConf]+"\n")
0561         log_fout.write("- RunControl? : "+applyruncontrol[iConf]+"\n")
0562         log_fout.write("- Pt>           "+ptcut[iConf]+"\n")
0563         log_fout.write("- run=          "+runboundary[iConf]+"\n")
0564         log_fout.write("- JSON        : "+lumilist[iConf]+"\n")
0565         log_fout.write("- output EOS  : "+eosdir+"\n")
0566
0567     print("Will run on ",len(jobName),"workflows")
0568
0569     for iConf in range(len(jobName)):
0570         print("Preparing",iConf," configurtion to run")
0571
0572         # for hadd script
0573         scripts_dir = os.path.join(AnalysisStep_dir,"scripts")
0574         if not os.path.exists(scripts_dir):
0575             os.makedirs(scripts_dir)
0576         hadd_script_file = os.path.join(scripts_dir,jobName[iConf]+".sh")
0577         fout = open(hadd_script_file,'w')
0578
0579         output_file_list1=list()
0580         output_file_list2=list()
0581         output_file_list2.append("hadd ")
0582
0583         inputFiles = []
0584         myRuns = []
0585
0586         if (to_bool(isMC[iConf]) or (not to_bool(doRunBased))):
0587             if(to_bool(isMC[iConf])):
0588                 print("this is MC")
0589                 cmd = 'dasgoclient -query \'file dataset='+opts.data+'\''
0590                 s = Popen(cmd , shell=True, stdout=PIPE, stderr=PIPE)
0591                 out,err = s.communicate()
0592                 mylist = out.split('\n')
0593                 mylist.pop()
0594                 #print mylist
0595
0596                 splitList = split(mylist,10)
0597                 for files in splitList:
0598                     inputFiles.append(files)
0599                     myRuns.append(str(1))
0600             else:
0601                 print("this is DATA (not doing full run-based selection)")
0602                 cmd = 'dasgoclient -query \'file dataset='+opts.data+' run='+runboundary[iConf]+'\''
0603                 #print cmd
0604                 s = Popen(cmd , shell=True, stdout=PIPE, stderr=PIPE)
0605                 out,err = s.communicate()
0606                 mylist = out.split('\n')
0607                 mylist.pop()
0608                 #print "len(mylist):",len(mylist)
0609                 print("mylist:",mylist)
0610                 inputFiles.append(mylist)
0611                 myRuns.append(str(runboundary[iConf]))
0612
0613         else:
0614             print("this is Data")
0615             print("doing run based selection")
0616             cmd = 'dasgoclient -query \'run dataset='+opts.data+'\''
0617             p = Popen(cmd , shell=True, stdout=PIPE, stderr=PIPE)
0618             out, err = p.communicate()
0619             listOfRuns=out.split('\n')
0620             listOfRuns.pop()
0621             listOfRuns.sort()
0622             myRuns = listOfRuns
0623             print("Will run on ",len(listOfRuns), " runs")
0624             print(listOfRuns)
0625
0626             procs = []
0627
0628             for run in listOfRuns:
0629                 #print "preparing run",run
0630                 cmd2 = ' dasgoclient -query \'file run='+run+' dataset='+opts.data+'\''
0631                 q = Popen(cmd2 , shell=True, stdout=PIPE, stderr=PIPE)
0632                 procs.append(q)
0633                 #out2, err2 = q.communicate()
0634                 #mylist = out2.split('\n')
0635                 #mylist.pop()
0636                 #inputFiles.append(mylist)
0637
0638             toolbar_width = len(listOfRuns)
0639             # setup toolbar
0640             print("********************************************************")
0641             print(" Retrieving run info")
0642
0643             for i,p in enumerate(procs):
0644                 out2,err2 = p.communicate()
0645                 mylist = out2.split('\n')
0646                 mylist.pop()
0647                 inputFiles.append(mylist)
0648                 #sys.stdout.write("-")
0649                 #sys.stdout.flush()
0650                 percent = float(i)/len(procs)
0651                 #print percent
0652                 drawProgressBar(percent)
0653
0654             sys.stdout.write("\n")
0655
0656         for jobN,theSrcFiles in enumerate(inputFiles):
0657             print(jobN,"run",myRuns[jobN],theSrcFiles)
0658             thejobIndex=None
0659             batchJobIds = []
0660
0661             #if(to_bool(isMC[iConf]) and (not to_bool(doRunBased))):
0662             if(to_bool(isMC[iConf])):
0663                 thejobIndex=jobN
0664             else:
0665                 thejobIndex=myRuns[jobN]
0666
0667             aJob = Job(thejobIndex,
0668                        jobName[iConf],isDA[iConf],isMC[iConf],
0669                        applyBOWS[iConf],applyEXTRACOND[iConf],conditions[iConf],
0670                        myRuns[jobN], lumilist[iConf], maxevents[iConf],
0671                        gt[iConf],allFromGT[iConf],
0672                        alignmentDB[iConf], alignmentTAG[iConf],
0673                        apeDB[iConf], apeTAG[iConf],
0674                        bowDB[iConf], bowTAG[iConf],
0675                        vertextype[iConf], tracktype[iConf],
0676                        applyruncontrol[iConf],
0677                        ptcut[iConf],input_CMSSW_BASE,AnalysisStep_dir)
0678
0679             aJob.setEOSout(eosdir)
0680             aJob.createTheCfgFile(theSrcFiles)
0681             aJob.createTheLSFFile()
0682
0683             output_file_list1.append("xrdcp root://eoscms//eos/cms"+aJob.getOutputFileName()+" /tmp/$USER/"+opts.taskname+" \n")
0684             if jobN == 0:
0685                 output_file_list2.append("/tmp/$USER/"+opts.taskname+"/"+aJob.getOutputBaseName()+".root ")
0686             output_file_list2.append("/tmp/$USER/"+opts.taskname+"/"+os.path.split(aJob.getOutputFileName())[1]+" ")
0687
0688             if opts.submit:
0689                 aJob.submit()
0690                 batchJobIds.append(ajob.getBatchjobId())
0691             del aJob
0692
0693         if opts.submit:
0694             print("********************************************************")
0695             for theBatchJobId in batchJobIds:
0696                 print("theBatchJobId is: ",theBatchJobId)
0697
0698         fout.write("#!/bin/bash \n")
0699         fout.write("MAIL = $USER@mail.cern.ch \n")
0700         fout.write("OUT_DIR = "+eosdir+ "\n")
0701         fout.write("echo $HOST | mail -s \"Harvesting job started\" $USER@mail.cern.ch \n")
0702         fout.write("cd "+os.path.join(input_CMSSW_BASE,"src")+"\n")
0703         fout.write("eval `scram r -sh` \n")
0704         fout.write("mkdir -p /tmp/$USER/"+opts.taskname+" \n")
0705         fout.writelines(output_file_list1)
0706         fout.writelines(output_file_list2)
0707         fout.write("\n")
0708         fout.write("echo \"xrdcp -f $FILE root://eoscms//eos/cms$OUT_DIR\" \n")
0709         fout.write("xrdcp -f root://eoscms//eos/cms$FILE $OUT_DIR \n")
0710         fout.write("echo \"Harvesting for complete; please find output at $OUT_DIR \" | mail -s \"Harvesting for" +opts.taskname +" compled\" $MAIL \n")
0711
0712         os.system("chmod u+x "+hadd_script_file)
0713
0714         conditions = '"' + " && ".join(["ended(" + jobId + ")" for jobId in batchJobIds]) + '"'
0715         print(conditions)
0716         lastJobCommand = "bsub -o harvester"+opts.taskname+".tmp -q 1nh -w "+conditions+" "+hadd_script_file
0717         print(lastJobCommand)
0718         if opts.submit:
0719             lastJobOutput = getCommandOutput(lastJobCommand)
0720             print(lastJobOutput)
0721
0722         fout.close()
0723         del output_file_list1
0724
0725 if __name__ == "__main__":
0726     main()