Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-05-13 01:31:53

0001 from __future__ import print_function
0002 import sys, os
0003 
0004 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
0005 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
0006 
0007 # ================================================================================
0008 
0009 class MatrixException(Exception):
0010     def __init__(self, msg):
0011         self.msg = msg
0012     def __str__(self):
0013         return self.msg
0014         
0015 # ================================================================================
0016 
0017 class MatrixReader(object):
0018 
0019     def __init__(self, opt):
0020 
0021         self.reset(opt.what)
0022 
0023         self.wm=opt.wmcontrol
0024         self.revertDqmio=opt.revertDqmio
0025         self.addCommand=opt.command
0026         self.apply=opt.apply
0027         self.commandLineWf=opt.workflow
0028         self.overWrite=opt.overWrite
0029 
0030         self.noRun = opt.noRun
0031         return
0032 
0033     def reset(self, what='all'):
0034 
0035         self.what = what
0036 
0037         #a bunch of information, but not yet the WorkFlow object
0038         self.workFlowSteps = {}
0039         #the actual WorkFlow objects
0040         self.workFlows = []
0041         self.nameList  = {}
0042         
0043         self.filesPrefMap = {'relval_standard' : 'std-' ,
0044                              'relval_highstats': 'hi-'  ,
0045                              'relval_pileup': 'PU-'  ,
0046                              'relval_generator': 'gen-',
0047                              'relval_extendedgen': 'genExt-',
0048                              'relval_production': 'prod-'  ,
0049                              'relval_ged': 'ged-',
0050                              'relval_upgrade':'upg-',
0051                              'relval_cleanedupgrade':'clnupg-',
0052                              'relval_gpu':'gpu-',
0053                              'relval_2017':'2017-',
0054                              'relval_2026':'2026-',
0055                              'relval_identity':'id-',
0056                              'relval_machine': 'mach-',
0057                              'relval_premix': 'premix-'
0058                              }
0059 
0060         self.files = ['relval_standard' ,
0061                       'relval_highstats',
0062                       'relval_pileup',
0063                       'relval_generator',
0064                       'relval_extendedgen',
0065                       'relval_production',
0066                       'relval_ged',
0067                       'relval_upgrade',
0068                       'relval_cleanedupgrade',
0069                       'relval_gpu',
0070                       'relval_2017',
0071                       'relval_2026',
0072                       'relval_identity',
0073                       'relval_machine',
0074                       'relval_premix'
0075                       ]
0076         self.filesDefault = {'relval_standard':True ,
0077                              'relval_highstats':True ,
0078                              'relval_pileup':True,
0079                              'relval_generator':True,
0080                              'relval_extendedgen':True,
0081                              'relval_production':True,
0082                              'relval_ged':True,
0083                              'relval_upgrade':False,
0084                              'relval_cleanedupgrade':False,
0085                              'relval_gpu':False,
0086                              'relval_2017':True,
0087                              'relval_2026':True,
0088                              'relval_identity':False,
0089                              'relval_machine':True,
0090                              'relval_premix':True
0091                              }
0092 
0093         self.relvalModule = None
0094         
0095         return
0096 
0097     def makeCmd(self, step):
0098 
0099         cmd = ''
0100         cfg = None
0101         input = None
0102         for k,v in step.items():
0103             if 'no_exec' in k : continue  # we want to really run it ...
0104             if k.lower() == 'cfg':
0105                 cfg = v
0106                 continue # do not append to cmd, return separately
0107             if k.lower() == 'input':
0108                 input = v 
0109                 continue # do not append to cmd, return separately
0110             
0111             #chain the configs
0112             #if k.lower() == '--python':
0113             #    v = 'step%d_%s'%(index,v)
0114             cmd += ' ' + k + ' ' + str(v)
0115         return cfg, input, cmd
0116     
0117     def makeStep(self,step,overrides):
0118         from Configuration.PyReleaseValidation.relval_steps import merge
0119         if len(overrides) > 0:
0120             copyStep=merge([overrides]+[step])
0121             return copyStep
0122         else:    
0123             return step
0124 
0125     def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
0126         
0127         prefix = self.filesPrefMap[fileNameIn]
0128         
0129         print("processing", fileNameIn)
0130         
0131         try:
0132             _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
0133             self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
0134         except Exception as e:
0135             print("ERROR importing file ", fileNameIn, str(e))
0136             return
0137 
0138         if useInput is not None:
0139             print("request for INPUT for ", useInput)
0140 
0141         
0142         fromInput={}
0143         
0144         if useInput:
0145             for i in useInput:
0146                 if ':' in i:
0147                     (ik,il)=i.split(':')
0148                     if ik=='all':
0149                         for k in self.relvalModule.workflows.keys():
0150                             fromInput[float(k)]=int(il)
0151                     else:
0152                         fromInput[float(ik)]=int(il)
0153                 else:
0154                     if i=='all':
0155                         for k in self.relvalModule.workflows.keys():
0156                             fromInput[float(k)]=0
0157                     else:
0158                         fromInput[float(i)]=0
0159                 
0160         if fromScratch:
0161             fromScratch=map(float,fromScratch)
0162             for num in fromScratch:
0163                 if num in fromInput:
0164                     fromInput.pop(num)
0165         #overwrite steps
0166         if self.overWrite:
0167             for p in self.overWrite:
0168                 self.relvalModule.steps.overwrite(p)
0169         
0170         #change the origin of dataset on the fly
0171         if refRel:
0172             if ',' in refRel:
0173                 refRels=refRel.split(',')
0174                 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
0175                     return
0176                 self.relvalModule.changeRefRelease(
0177                     self.relvalModule.steps,
0178                     list(zip(self.relvalModule.baseDataSetRelease,refRels))
0179                     )
0180             else:
0181                 self.relvalModule.changeRefRelease(
0182                     self.relvalModule.steps,
0183                     [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
0184                     )
0185             
0186 
0187         for num, wfInfo in self.relvalModule.workflows.items():
0188             commands=[]
0189             wfName = wfInfo[0]
0190             stepList = wfInfo[1]
0191             stepOverrides=wfInfo.overrides
0192             # upgrade case: workflow has basic name, key[, suffix (only special workflows)]
0193             wfKey = ""
0194             wfSuffix = ""
0195             if isinstance(wfName, list) and len(wfName)>1:
0196                 if len(wfName)>2: wfSuffix = wfName[2]
0197                 wfKey = wfName[1]
0198                 wfName = wfName[0]
0199             # if no explicit name given for the workflow, use the name of step1
0200             if wfName.strip() == '': wfName = stepList[0]
0201             # option to specialize the wf as the third item in the WF list
0202             addTo=None
0203             addCom=None
0204             if len(wfInfo)>=3:
0205                 addCom=wfInfo[2]
0206                 if not isinstance(addCom, list):   addCom=[addCom]
0207                 #print 'added dict',addCom
0208                 if len(wfInfo)>=4:
0209                     addTo=wfInfo[3]
0210                     #pad with 0
0211                     while len(addTo)!=len(stepList):
0212                         addTo.append(0)
0213 
0214             name=wfName
0215             # separate suffixes by + because show() excludes first part of name
0216             if len(wfKey)>0:
0217                 name = name+'+'+wfKey
0218                 if len(wfSuffix)>0: name = name+wfSuffix
0219             stepIndex=0
0220             ranStepList=[]
0221 
0222             #first resolve INPUT possibilities
0223             if num in fromInput:
0224                 ilevel=fromInput[num]
0225                 #print num,ilevel
0226                 for (stepIr,step) in enumerate(reversed(stepList)):
0227                     stepName=step
0228                     stepI=(len(stepList)-stepIr)-1
0229                     #print stepIr,step,stepI,ilevel                    
0230                     if stepI>ilevel:
0231                         #print "ignoring"
0232                         continue
0233                     if stepI!=0:
0234                         testName='__'.join(stepList[0:stepI+1])+'INPUT'
0235                     else:
0236                         testName=step+'INPUT'
0237                     #print "JR",stepI,stepIr,testName,stepList
0238                     if testName in self.relvalModule.steps:
0239                         #print "JR",stepI,stepIr
0240                         stepList[stepI]=testName
0241                         #pop the rest in the list
0242                         #print "\tmod prepop",stepList
0243                         for p in range(stepI):
0244                             stepList.pop(0)
0245                         #print "\t\tmod",stepList
0246                         break
0247                                                         
0248                                                     
0249             for (stepI,step) in enumerate(stepList):
0250                 stepName=step
0251                 if self.relvalModule.steps[stepName] is None:
0252                     continue
0253                 if self.wm:
0254                     #cannot put a certain number of things in wm
0255                     if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
0256                         continue
0257                     
0258                 #replace stepName is needed
0259                 #if stepName in self.replaceStep
0260                 if len(name) > 0 : name += '+'
0261                 #any step can be mirrored with INPUT
0262                 ## maybe we want too level deep input
0263                 """
0264                 if num in fromInput:
0265                     if step+'INPUT' in self.relvalModule.steps.keys():
0266                         stepName = step+"INPUT"
0267                         stepList.remove(step)
0268                         stepList.insert(stepIndex,stepName)
0269                 """
0270                 stepNameTmp = stepName
0271                 if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
0272                 if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
0273                 name += stepNameTmp
0274                 if addCom and (not addTo or addTo[stepIndex]==1):
0275                     from Configuration.PyReleaseValidation.relval_steps import merge
0276                     copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
0277                     cfg, input, opts = self.makeCmd(copyStep)
0278                 else:
0279                     cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
0280 
0281                 if input and cfg :
0282                     msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
0283                     raise MatrixException(msg)
0284 
0285                 if input:
0286                     cmd = input
0287                     if self.noRun:
0288                         cmd.run=[]
0289                 else:
0290                     if cfg:
0291                         cmd  = 'cmsDriver.py '+cfg+' '+opts
0292                     else:
0293                         cmd  = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
0294                     if self.wm:
0295                         cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
0296                     if self.addCommand:
0297                         if self.apply:
0298                             if stepIndex in self.apply or stepName in self.apply:
0299                                 cmd +=' '+self.addCommand
0300                         else:
0301                           cmd +=' '+self.addCommand
0302                     if self.wm and self.revertDqmio=='yes':
0303                         cmd=cmd.replace('DQMIO','DQM')
0304                         cmd=cmd.replace('--filetype DQM','')
0305                 commands.append(cmd)
0306                 ranStepList.append(stepName)
0307                 stepIndex+=1
0308                 
0309             self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
0310         
0311         return
0312 
0313 
0314     def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
0315 
0316         if selected:
0317             selected=map(float,selected)
0318         for matrixFile in self.files:
0319 
0320             self.reset(what)
0321 
0322             if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0323                 print("ignoring non-requested file",matrixFile)
0324                 continue
0325 
0326             if self.what == 'all' and not self.filesDefault[matrixFile]:
0327                 print("ignoring file not used by default (enable with -w)",matrixFile)
0328                 continue
0329 
0330             try:
0331                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0332             except Exception as e:
0333                 print("ERROR reading file:", matrixFile, str(e))
0334                 raise
0335 
0336             if not self.workFlowSteps: continue
0337 
0338             dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
0339             outFile = open(dataFileName,'w')
0340 
0341             print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
0342             ids = sorted(self.workFlowSteps.keys())
0343             indexAndSteps=[]
0344 
0345             writtenWF=0
0346             for key in ids:
0347                 if selected and not (key[0] in selected):
0348                     continue
0349                 #trick to skip the HImix IB test
0350                 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
0351                 num, name, commands, stepList = self.workFlowSteps[key]
0352                 
0353                 wfName,stepNames= name.split('+',1)
0354                 
0355                 stepNames=stepNames.replace('+SKIMCOSD','')
0356                 stepNames=stepNames.replace('+SKIMD','')
0357                 if 'HARVEST' in stepNames:
0358                     #find out automatically what to remove
0359                     exactb=stepNames.index('+HARVEST')
0360                     exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
0361                     stepNames=stepNames.replace(stepNames[exactb:exacte],'')
0362                 otherSteps = None
0363                 if '+' in stepNames:
0364                     step1,otherSteps = stepNames.split('+',1)
0365                 
0366                 line = str(num) + ' ++ '+ wfName 
0367                 if otherSteps and not step1Only:
0368                     line += ' ++ ' +otherSteps.replace('+',',')
0369                 else:
0370                     line += ' ++ none'
0371                 inputInfo=None
0372                 if not isinstance(commands[0],str):
0373                     inputInfo=commands[0]
0374                 if otherSteps:
0375                     for (i,c) in enumerate(otherSteps.split('+')):
0376                         #pad with set
0377                         for p in range(len(indexAndSteps),i+2):
0378                             indexAndSteps.append(set())
0379                         indexAndSteps[i+1].add((c,commands[i+1]))
0380 
0381                 if inputInfo :
0382                     #skip the samples from INPUT when step1Only is on
0383                     if step1Only: continue
0384                     line += ' ++ REALDATA: '+inputInfo.dataSet
0385                     if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
0386                     line += ', FILES: ' +str(inputInfo.files)
0387                     line += ', EVENTS: '+str(inputInfo.events)
0388                     if inputInfo.label!='':
0389                         line += ', LABEL: ' +inputInfo.label
0390                     line += ', LOCATION:'+inputInfo.location
0391                     line += ' @@@'
0392                 else:
0393                     line += ' @@@ '+commands[0]
0394                 if self.revertDqmio=='yes':
0395                     line=line.replace('DQMIO','DQM')
0396                 writtenWF+=1
0397                 outFile.write(line+'\n')
0398 
0399 
0400             outFile.write('\n'+'\n')
0401             if step1Only: continue
0402 
0403             for (index,s) in enumerate(indexAndSteps):
0404                 for (stepName,cmd) in s:
0405                     stepIndex=index+1
0406                     if 'dasquery.log' in cmd: continue
0407                     line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
0408                     if self.revertDqmio=='yes':
0409                         line=line.replace('DQMIO','DQM')
0410                     outFile.write(line+'\n')
0411                 outFile.write('\n'+'\n')
0412             outFile.close()
0413             print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
0414         return 
0415 
0416     def workFlowsByLocation(self, cafVeto=True):
0417         # Check if we are on CAF
0418         onCAF = False
0419         if 'cms/caf/cms' in os.environ['CMS_PATH']:
0420             onCAF = True
0421 
0422         workflows = []
0423         for workflow in self.workFlows:
0424             if isinstance(workflow.cmds[0], InputInfo):
0425                 if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
0426                     continue
0427             workflows.append(workflow)
0428 
0429         return workflows
0430 
0431     def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
0432         if selected: selected = list(map(float,selected))
0433         wfs = self.workFlowsByLocation(cafVeto)
0434         maxLen = 100 # for summary, limit width of output
0435         fmt1   = "%-6s %-35s [1]: %s ..."
0436         fmt2   = "       %35s [%d]: %s ..."
0437         print("\nfound a total of ", len(wfs), ' workflows:')
0438         if selected:
0439             print("      of which the following", len(selected), 'were selected:')
0440         #-ap for now:
0441         maxLen = -1  # for individual listing, no limit on width
0442         fmt1   = "%-6s %-35s [1]: %s " 
0443         fmt2   = "       %35s [%d]: %s"
0444 
0445         N=[]
0446         for wf in wfs:
0447             if selected and float(wf.numId) not in selected: continue
0448             if extended: print('')
0449             #pad with zeros
0450             for i in range(len(N),len(wf.cmds)):                N.append(0)
0451             N[len(wf.cmds)-1]+=1
0452             wfName, stepNames = wf.nameId.split('+',1)
0453             for i,s in enumerate(wf.cmds):
0454                 if extended:
0455                     if i==0:
0456                         print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
0457                     else:
0458                         print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
0459                 else:
0460                     print("%-6s %-35s "% (wf.numId, stepNames))
0461                     break
0462         print('')
0463         for i,n in enumerate(N):
0464             if n:            print(n,'workflows with',i+1,'steps')
0465 
0466         return
0467     
0468     def createWorkFlows(self, fileNameIn):
0469 
0470         prefixIn = self.filesPrefMap[fileNameIn]
0471 
0472         # get through the list of items and update the requested workflows only
0473         keyList = self.workFlowSteps.keys()
0474         ids = []
0475         for item in keyList:
0476             id, pref = item
0477             if pref != prefixIn : continue
0478             ids.append(id)
0479         ids.sort()
0480         for key in ids:
0481             val = self.workFlowSteps[(key,prefixIn)]
0482             num, name, commands, stepList = val
0483             nameId = str(num)+'_'+name
0484             if nameId in self.nameList:
0485                 print("==> duplicate name found for ", nameId)
0486                 print('    keeping  : ', self.nameList[nameId])
0487                 print('    ignoring : ', val)
0488             else:
0489                 self.nameList[nameId] = val
0490 
0491             self.workFlows.append(WorkFlow(num, name, commands=commands))
0492 
0493         return
0494 
0495     def prepare(self, useInput=None, refRel='', fromScratch=None):
0496         
0497         for matrixFile in self.files:
0498             if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0499                 print("ignoring non-requested file",matrixFile)
0500                 continue
0501             if self.what == 'all' and not self.filesDefault[matrixFile]:
0502                 print("ignoring",matrixFile,"from default matrix")
0503                 continue
0504             
0505             try:
0506                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0507             except Exception as e:
0508                 print("ERROR reading file:", matrixFile, str(e))
0509                 raise
0510             
0511             try:
0512                 self.createWorkFlows(matrixFile)
0513             except Exception as e:
0514                 print("ERROR creating workflows :", str(e))
0515                 raise
0516             
0517                 
0518     def show(self, selected=None, extended=True, cafVeto=True):
0519 
0520         self.showWorkFlows(selected, extended, cafVeto)
0521         print('\n','-'*80,'\n')
0522 
0523 
0524     def updateDB(self):
0525 
0526         import pickle
0527         pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
0528 
0529         return
0530