Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-03-26 01:50:59

0001 import sys, os
0002 
0003 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
0004 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
0005 from Configuration.PyReleaseValidation.upgradeWorkflowComponents import defaultDataSets,undefInput
0006 # ================================================================================
0007 
0008 class MatrixException(Exception):
0009     def __init__(self, msg):
0010         self.msg = msg
0011     def __str__(self):
0012         return self.msg
0013         
0014 # ================================================================================
0015 
0016 class MatrixReader(object):
0017 
0018     def __init__(self, opt):
0019 
0020         self.reset(opt.what)
0021 
0022         self.wm=opt.wmcontrol
0023         self.revertDqmio=opt.revertDqmio
0024         self.addCommand=opt.command
0025         self.apply=opt.apply
0026         self.commandLineWf=opt.workflow
0027         self.overWrite=opt.overWrite
0028         
0029         self.noRun = opt.noRun
0030         self.checkInputs = opt.checkInputs
0031         return
0032 
0033     def reset(self, what='all'):
0034 
0035         self.what = what
0036 
0037         #a bunch of information, but not yet the WorkFlow object
0038         self.workFlowSteps = {}
0039         #the actual WorkFlow objects
0040         self.workFlows = []
0041         self.nameList  = {}
0042         
0043         self.filesPrefMap = {'relval_standard' : 'std-' ,
0044                              'relval_highstats': 'hi-'  ,
0045                              'relval_pileup': 'PU-'  ,
0046                              'relval_generator': 'gen-',
0047                              'relval_extendedgen': 'genExt-',
0048                              'relval_production': 'prod-'  ,
0049                              'relval_ged': 'ged-',
0050                              'relval_upgrade':'upg-',
0051                              'relval_cleanedupgrade':'clnupg-',
0052                              'relval_gpu':'gpu-',
0053                              'relval_2017':'2017-',
0054                              'relval_Run4':'Run4-',
0055                              'relval_identity':'id-',
0056                              'relval_machine': 'mach-',
0057                              'relval_premix': 'premix-',
0058                              'relval_nano':'nano-',
0059                              'relval_data_highstats':'data-'
0060                              }
0061 
0062         self.files = ['relval_standard' ,
0063                       'relval_highstats',
0064                       'relval_pileup',
0065                       'relval_generator',
0066                       'relval_extendedgen',
0067                       'relval_production',
0068                       'relval_ged',
0069                       'relval_upgrade',
0070                       'relval_cleanedupgrade',
0071                       'relval_gpu',
0072                       'relval_2017',
0073                       'relval_Run4',
0074                       'relval_identity',
0075                       'relval_machine',
0076                       'relval_premix',
0077                       'relval_nano',
0078                       'relval_data_highstats'
0079                       ]
0080         self.filesDefault = {'relval_standard':True ,
0081                              'relval_highstats':True ,
0082                              'relval_pileup':True,
0083                              'relval_generator':True,
0084                              'relval_extendedgen':True,
0085                              'relval_production':True,
0086                              'relval_ged':True,
0087                              'relval_upgrade':False,
0088                              'relval_cleanedupgrade':False,
0089                              'relval_gpu':False,
0090                              'relval_2017':True,
0091                              'relval_Run4':True,
0092                              'relval_identity':False,
0093                              'relval_machine':True,
0094                              'relval_premix':True,
0095                              'relval_nano':True,
0096                              'relval_data_highstats':False
0097                              }
0098 
0099         self.relvalModule = None
0100         
0101         return
0102 
0103     def makeCmd(self, step):
0104 
0105         cmd = ''
0106         cfg = None
0107         input = None
0108         for k,v in step.items():
0109             if 'no_exec' in k : continue  # we want to really run it ...
0110             if k.lower() == 'cfg':
0111                 cfg = v
0112                 continue # do not append to cmd, return separately
0113             if k.lower() == 'input':
0114                 input = v 
0115                 continue # do not append to cmd, return separately
0116             
0117             #chain the configs
0118             #if k.lower() == '--python':
0119             #    v = 'step%d_%s'%(index,v)
0120             cmd += ' ' + k + ' ' + str(v)
0121         return cfg, input, cmd
0122     
0123     def makeStep(self,step,overrides):
0124         from Configuration.PyReleaseValidation.relval_steps import merge
0125         if len(overrides) > 0:
0126             copyStep=merge([overrides]+[step])
0127             return copyStep
0128         else:    
0129             return step
0130 
0131     def verifyDefaultInputs(self):
0132         for wf in self.workFlowSteps.values():
0133             undefs = [driver for driver in wf[2] if isinstance(driver,str) and undefInput in driver ]
0134             if len(undefs)>0:
0135                 raise ValueError("""in MatrixReader.py:{0}
0136 =============================================================================
0137 For wf {1}(*) the default dataset not defined in defaultDataSets dictionary.
0138 With --checkInputs option this throws an error.
0139                                  
0140 (*)
0141 {2}
0142 
0143 =============================================================================
0144                              """.format(sys._getframe(1).f_lineno - 1,wf[0],wf))    
0145 
0146     def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
0147         
0148         prefix = self.filesPrefMap[fileNameIn]
0149         
0150         print("processing", fileNameIn)
0151         
0152         try:
0153             _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
0154             self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
0155         except Exception as e:
0156             print("ERROR importing file ", fileNameIn, str(e))
0157             return
0158 
0159         if useInput is not None:
0160             print("request for INPUT for ", useInput)
0161 
0162         
0163         fromInput={}
0164         
0165         if useInput:
0166             for i in useInput:
0167                 if ':' in i:
0168                     (ik,il)=i.split(':')
0169                     if ik=='all':
0170                         for k in self.relvalModule.workflows.keys():
0171                             fromInput[float(k)]=int(il)
0172                     else:
0173                         fromInput[float(ik)]=int(il)
0174                 else:
0175                     if i=='all':
0176                         for k in self.relvalModule.workflows.keys():
0177                             fromInput[float(k)]=0
0178                     else:
0179                         fromInput[float(i)]=0
0180                 
0181         if fromScratch:
0182             fromScratch=map(float,fromScratch)
0183             for num in fromScratch:
0184                 if num in fromInput:
0185                     fromInput.pop(num)
0186         #overwrite steps
0187         if self.overWrite:
0188             for p in self.overWrite:
0189                 self.relvalModule.steps.overwrite(p)
0190         
0191         #change the origin of dataset on the fly
0192         if refRel:
0193             if ',' in refRel:
0194                 refRels=refRel.split(',')
0195                 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
0196                     return
0197                 self.relvalModule.changeRefRelease(
0198                     self.relvalModule.steps,
0199                     list(zip(self.relvalModule.baseDataSetRelease,refRels))
0200                     )
0201             else:
0202                 self.relvalModule.changeRefRelease(
0203                     self.relvalModule.steps,
0204                     [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
0205                     )
0206 
0207         for num, wfInfo in self.relvalModule.workflows.items():
0208             commands=[]
0209             wfName = wfInfo[0]
0210             stepList = wfInfo[1]
0211             stepOverrides=wfInfo.overrides
0212             # upgrade case: workflow has basic name, key[, suffix (only special workflows)]
0213             wfKey = ""
0214             wfSuffix = ""
0215             if isinstance(wfName, list) and len(wfName)>1:
0216                 if len(wfName)>2: wfSuffix = wfName[2]
0217                 wfKey = wfName[1]
0218                 wfName = wfName[0]
0219             # if no explicit name given for the workflow, use the name of step1
0220             if wfName.strip() == '': wfName = stepList[0]
0221             # option to specialize the wf as the third item in the WF list
0222             addTo=None
0223             addCom=None
0224             if len(wfInfo)>=3:
0225                 addCom=wfInfo[2]
0226                 if not isinstance(addCom, list):   addCom=[addCom]
0227                 #print 'added dict',addCom
0228                 if len(wfInfo)>=4:
0229                     addTo=wfInfo[3]
0230                     #pad with 0
0231                     while len(addTo)!=len(stepList):
0232                         addTo.append(0)
0233 
0234             name=wfName
0235             # separate suffixes by + because show() excludes first part of name
0236             if len(wfKey)>0:
0237                 name = name+'+'+wfKey
0238                 if len(wfSuffix)>0: name = name+wfSuffix
0239             stepIndex=0
0240             ranStepList=[]
0241             name_for_workflow = name
0242 
0243             #first resolve INPUT possibilities
0244             if num in fromInput:
0245                 ilevel=fromInput[num]
0246                 #print num,ilevel
0247                 for (stepIr,step) in enumerate(reversed(stepList)):
0248                     stepName=step
0249                     stepI=(len(stepList)-stepIr)-1
0250                     #print stepIr,step,stepI,ilevel                    
0251                     if stepI>ilevel:
0252                         #print "ignoring"
0253                         continue
0254                     if stepI!=0:
0255                         testName='__'.join(stepList[0:stepI+1])+'INPUT'
0256                     else:
0257                         testName=step+'INPUT'
0258                     #print "JR",stepI,stepIr,testName,stepList
0259                     if testName in self.relvalModule.steps:
0260                         #print "JR",stepI,stepIr
0261                         stepList[stepI]=testName
0262                         #pop the rest in the list
0263                         #print "\tmod prepop",stepList
0264                         for p in range(stepI):
0265                             stepList.pop(0)
0266                         #print "\t\tmod",stepList
0267                         break
0268 
0269             for (stepI,step) in enumerate(stepList):
0270                 stepName=step
0271                 if self.relvalModule.steps[stepName] is None:
0272                     continue
0273                 if self.wm:
0274                     #cannot put a certain number of things in wm
0275                     if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
0276                         continue
0277 
0278                 #replace stepName is needed
0279                 #if stepName in self.replaceStep
0280                 if len(name) > 0 : name += '+'
0281                 #any step can be mirrored with INPUT
0282                 ## maybe we want too level deep input
0283                 """
0284                 if num in fromInput:
0285                     if step+'INPUT' in self.relvalModule.steps.keys():
0286                         stepName = step+"INPUT"
0287                         stepList.remove(step)
0288                         stepList.insert(stepIndex,stepName)
0289                 """
0290                 stepNameTmp = stepName
0291                 if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
0292                 if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
0293                 name += stepNameTmp
0294                 if addCom and (not addTo or addTo[stepIndex]==1):
0295                     from Configuration.PyReleaseValidation.relval_steps import merge
0296                     copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
0297                     cfg, input, opts = self.makeCmd(copyStep)
0298                 else:
0299                     cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
0300 
0301                 if input and cfg :
0302                     msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
0303                     raise MatrixException(msg)
0304 
0305                 if input:
0306                     cmd = input
0307                     if self.noRun:
0308                         cmd.run=[]
0309                 else:
0310                     if cfg:
0311                         cmd  = 'cmsDriver.py '+cfg+' '+opts
0312                     else:
0313                         cmd  = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
0314                     if self.wm:
0315                         cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
0316                     if self.addCommand:
0317                         if self.apply:
0318                             if stepIndex in self.apply or stepName in self.apply:
0319                                 cmd +=' '+self.addCommand
0320                         else:
0321                           cmd +=' '+self.addCommand
0322                     if self.wm and self.revertDqmio=='yes':
0323                         cmd=cmd.replace('DQMIO','DQM')
0324                         cmd=cmd.replace('--filetype DQM','')
0325                 commands.append(cmd)
0326                 ranStepList.append(stepName)
0327                 stepIndex+=1
0328             self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
0329         
0330         return
0331 
0332 
0333     def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
0334 
0335         if selected:
0336             selected=map(float,selected)
0337         for matrixFile in self.files:
0338 
0339             self.reset(what)
0340 
0341             if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0342                 print("ignoring non-requested file",matrixFile)
0343                 continue
0344 
0345             if self.what == 'all' and not self.filesDefault[matrixFile]:
0346                 print("ignoring file not used by default (enable with -w)",matrixFile)
0347                 continue
0348 
0349             try:
0350                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0351                 if self.checkInputs:
0352                     self.verifyDefaultInputs()
0353             except Exception as e:
0354                 print("ERROR reading file:", matrixFile, str(e))
0355                 raise
0356 
0357             if not self.workFlowSteps: continue
0358 
0359             dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
0360             outFile = open(dataFileName,'w')
0361 
0362             print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
0363             ids = sorted(self.workFlowSteps.keys())
0364             indexAndSteps=[]
0365 
0366             writtenWF=0
0367             for key in ids:
0368                 if selected and not (key[0] in selected):
0369                     continue
0370                 #trick to skip the HImix IB test
0371                 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
0372                 num, name, commands, stepList = self.workFlowSteps[key]
0373                 wfName,stepNames= name.split('+',1)
0374                 
0375                 stepNames=stepNames.replace('+SKIMCOSD','')
0376                 stepNames=stepNames.replace('+SKIMD','')
0377                 if 'HARVEST' in stepNames:
0378                     #find out automatically what to remove
0379                     exactb=stepNames.index('+HARVEST')
0380                     exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
0381                     stepNames=stepNames.replace(stepNames[exactb:exacte],'')
0382                 otherSteps = None
0383                 if '+' in stepNames:
0384                     step1,otherSteps = stepNames.split('+',1)
0385                 
0386                 line = str(num) + ' ++ '+ wfName 
0387                 if otherSteps and not step1Only:
0388                     line += ' ++ ' +otherSteps.replace('+',',')
0389                 else:
0390                     line += ' ++ none'
0391                 inputInfo=None
0392                 if not isinstance(commands[0],str):
0393                     inputInfo=commands[0]
0394                 if otherSteps:
0395                     for (i,c) in enumerate(otherSteps.split('+')):
0396                         #pad with set
0397                         for p in range(len(indexAndSteps),i+2):
0398                             indexAndSteps.append(set())
0399                         indexAndSteps[i+1].add((c,commands[i+1]))
0400 
0401                 if inputInfo :
0402                     #skip the samples from INPUT when step1Only is on
0403                     if step1Only: continue
0404                     line += ' ++ REALDATA: '+inputInfo.dataSet
0405                     if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
0406                     line += ', FILES: ' +str(inputInfo.files)
0407                     line += ', EVENTS: '+str(inputInfo.events)
0408                     if inputInfo.label!='':
0409                         line += ', LABEL: ' +inputInfo.label
0410                     line += ', LOCATION:'+inputInfo.location
0411                     line += ' @@@'
0412                 else:
0413                     line += ' @@@ '+commands[0]
0414                 if self.revertDqmio=='yes':
0415                     line=line.replace('DQMIO','DQM')
0416                 writtenWF+=1
0417                 outFile.write(line+'\n')
0418 
0419 
0420             outFile.write('\n'+'\n')
0421             if step1Only: continue
0422 
0423             for (index,s) in enumerate(indexAndSteps):
0424                 for (stepName,cmd) in s:
0425                     stepIndex=index+1
0426                     if 'dasquery.log' in cmd: continue
0427                     line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
0428                     if self.revertDqmio=='yes':
0429                         line=line.replace('DQMIO','DQM')
0430                     outFile.write(line+'\n')
0431                 outFile.write('\n'+'\n')
0432             outFile.close()
0433             print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
0434         return 
0435 
0436     def workFlowsByLocation(self, cafVeto=True):
0437         # Check if we are on CAF
0438         onCAF = False
0439         if 'cms/caf/cms' in os.environ['CMS_PATH']:
0440             onCAF = True
0441 
0442         workflows = []
0443         for workflow in self.workFlows:
0444             if isinstance(workflow.cmds[0], InputInfo):
0445                 if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
0446                     continue
0447             workflows.append(workflow)
0448 
0449         return workflows
0450 
0451     def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
0452         if selected: selected = list(map(float,selected))
0453         wfs = self.workFlowsByLocation(cafVeto)
0454         maxLen = 100 # for summary, limit width of output
0455         fmt1   = "%-6s %-35s [1]: %s ..."
0456         fmt2   = "       %35s [%d]: %s ..."
0457         print("\nfound a total of ", len(wfs), ' workflows:')
0458         if selected:
0459             print("      of which the following", len(selected), 'were selected:')
0460         #-ap for now:
0461         maxLen = -1  # for individual listing, no limit on width
0462         fmt1   = "%-6s %-35s [1]: %s " 
0463         fmt2   = "       %35s [%d]: %s"
0464 
0465         N=[]
0466         for wf in wfs:
0467             if selected and float(wf.numId) not in selected: continue
0468             if extended: print('')
0469             #pad with zeros
0470             for i in range(len(N),len(wf.cmds)):                N.append(0)
0471             N[len(wf.cmds)-1]+=1
0472             wfName = wf.nameId
0473             stepNames = '+'.join(wf.stepList)
0474             for i,s in enumerate(wf.cmds):
0475                 if extended:
0476                     if i==0:
0477                         print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
0478                     else:
0479                         print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
0480                 else:
0481                     print("%-6s %-35s "% (wf.numId, stepNames))
0482                     break
0483         print('')
0484         for i,n in enumerate(N):
0485             if n:            print(n,'workflows with',i+1,'steps')
0486 
0487         return
0488     
0489     def createWorkFlows(self, fileNameIn):
0490 
0491         prefixIn = self.filesPrefMap[fileNameIn]
0492 
0493         # get through the list of items and update the requested workflows only
0494         keyList = self.workFlowSteps.keys()
0495         ids = []
0496         for item in keyList:
0497             id, pref = item
0498             if pref != prefixIn : continue
0499             ids.append(id)
0500         ids.sort()
0501         for key in ids:
0502             val = self.workFlowSteps[(key,prefixIn)]
0503             num, name, commands, stepList = val
0504             nameId = str(num)+'_'+name
0505             if nameId in self.nameList:
0506                 print("==> duplicate name found for ", nameId)
0507                 print('    keeping  : ', self.nameList[nameId])
0508                 print('    ignoring : ', val)
0509             else:
0510                 self.nameList[nameId] = val
0511 
0512             self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
0513 
0514         return
0515 
0516     def prepare(self, useInput=None, refRel='', fromScratch=None):
0517         
0518         for matrixFile in self.files:
0519             if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0520                 print("ignoring non-requested file",matrixFile)
0521                 continue
0522             if self.what == 'all' and not self.filesDefault[matrixFile]:
0523                 print("ignoring",matrixFile,"from default matrix")
0524                 continue
0525             
0526             try:
0527                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0528                 if self.checkInputs:
0529                     self.verifyDefaultInputs()
0530             except Exception as e:
0531                 print("ERROR reading file:", matrixFile, str(e))
0532                 raise
0533             
0534             try:
0535                 self.createWorkFlows(matrixFile)
0536             except Exception as e:
0537                 print("ERROR creating workflows :", str(e))
0538                 raise
0539             
0540                 
0541     def show(self, selected=None, extended=True, cafVeto=True):
0542 
0543         self.showWorkFlows(selected, extended, cafVeto)
0544         print('\n','-'*80,'\n')
0545 
0546 
0547     def updateDB(self):
0548 
0549         import pickle
0550         pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
0551 
0552         return
0553