File indexing completed on 2025-03-26 01:50:59
0001 import sys, os
0002
0003 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
0004 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
0005 from Configuration.PyReleaseValidation.upgradeWorkflowComponents import defaultDataSets,undefInput
0006
0007
0008 class MatrixException(Exception):
0009 def __init__(self, msg):
0010 self.msg = msg
0011 def __str__(self):
0012 return self.msg
0013
0014
0015
0016 class MatrixReader(object):
0017
0018 def __init__(self, opt):
0019
0020 self.reset(opt.what)
0021
0022 self.wm=opt.wmcontrol
0023 self.revertDqmio=opt.revertDqmio
0024 self.addCommand=opt.command
0025 self.apply=opt.apply
0026 self.commandLineWf=opt.workflow
0027 self.overWrite=opt.overWrite
0028
0029 self.noRun = opt.noRun
0030 self.checkInputs = opt.checkInputs
0031 return
0032
0033 def reset(self, what='all'):
0034
0035 self.what = what
0036
0037
0038 self.workFlowSteps = {}
0039
0040 self.workFlows = []
0041 self.nameList = {}
0042
0043 self.filesPrefMap = {'relval_standard' : 'std-' ,
0044 'relval_highstats': 'hi-' ,
0045 'relval_pileup': 'PU-' ,
0046 'relval_generator': 'gen-',
0047 'relval_extendedgen': 'genExt-',
0048 'relval_production': 'prod-' ,
0049 'relval_ged': 'ged-',
0050 'relval_upgrade':'upg-',
0051 'relval_cleanedupgrade':'clnupg-',
0052 'relval_gpu':'gpu-',
0053 'relval_2017':'2017-',
0054 'relval_Run4':'Run4-',
0055 'relval_identity':'id-',
0056 'relval_machine': 'mach-',
0057 'relval_premix': 'premix-',
0058 'relval_nano':'nano-',
0059 'relval_data_highstats':'data-'
0060 }
0061
0062 self.files = ['relval_standard' ,
0063 'relval_highstats',
0064 'relval_pileup',
0065 'relval_generator',
0066 'relval_extendedgen',
0067 'relval_production',
0068 'relval_ged',
0069 'relval_upgrade',
0070 'relval_cleanedupgrade',
0071 'relval_gpu',
0072 'relval_2017',
0073 'relval_Run4',
0074 'relval_identity',
0075 'relval_machine',
0076 'relval_premix',
0077 'relval_nano',
0078 'relval_data_highstats'
0079 ]
0080 self.filesDefault = {'relval_standard':True ,
0081 'relval_highstats':True ,
0082 'relval_pileup':True,
0083 'relval_generator':True,
0084 'relval_extendedgen':True,
0085 'relval_production':True,
0086 'relval_ged':True,
0087 'relval_upgrade':False,
0088 'relval_cleanedupgrade':False,
0089 'relval_gpu':False,
0090 'relval_2017':True,
0091 'relval_Run4':True,
0092 'relval_identity':False,
0093 'relval_machine':True,
0094 'relval_premix':True,
0095 'relval_nano':True,
0096 'relval_data_highstats':False
0097 }
0098
0099 self.relvalModule = None
0100
0101 return
0102
0103 def makeCmd(self, step):
0104
0105 cmd = ''
0106 cfg = None
0107 input = None
0108 for k,v in step.items():
0109 if 'no_exec' in k : continue
0110 if k.lower() == 'cfg':
0111 cfg = v
0112 continue
0113 if k.lower() == 'input':
0114 input = v
0115 continue
0116
0117
0118
0119
0120 cmd += ' ' + k + ' ' + str(v)
0121 return cfg, input, cmd
0122
0123 def makeStep(self,step,overrides):
0124 from Configuration.PyReleaseValidation.relval_steps import merge
0125 if len(overrides) > 0:
0126 copyStep=merge([overrides]+[step])
0127 return copyStep
0128 else:
0129 return step
0130
0131 def verifyDefaultInputs(self):
0132 for wf in self.workFlowSteps.values():
0133 undefs = [driver for driver in wf[2] if isinstance(driver,str) and undefInput in driver ]
0134 if len(undefs)>0:
0135 raise ValueError("""in MatrixReader.py:{0}
0136 =============================================================================
0137 For wf {1}(*) the default dataset not defined in defaultDataSets dictionary.
0138 With --checkInputs option this throws an error.
0139
0140 (*)
0141 {2}
0142
0143 =============================================================================
0144 """.format(sys._getframe(1).f_lineno - 1,wf[0],wf))
0145
0146 def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
0147
0148 prefix = self.filesPrefMap[fileNameIn]
0149
0150 print("processing", fileNameIn)
0151
0152 try:
0153 _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
0154 self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
0155 except Exception as e:
0156 print("ERROR importing file ", fileNameIn, str(e))
0157 return
0158
0159 if useInput is not None:
0160 print("request for INPUT for ", useInput)
0161
0162
0163 fromInput={}
0164
0165 if useInput:
0166 for i in useInput:
0167 if ':' in i:
0168 (ik,il)=i.split(':')
0169 if ik=='all':
0170 for k in self.relvalModule.workflows.keys():
0171 fromInput[float(k)]=int(il)
0172 else:
0173 fromInput[float(ik)]=int(il)
0174 else:
0175 if i=='all':
0176 for k in self.relvalModule.workflows.keys():
0177 fromInput[float(k)]=0
0178 else:
0179 fromInput[float(i)]=0
0180
0181 if fromScratch:
0182 fromScratch=map(float,fromScratch)
0183 for num in fromScratch:
0184 if num in fromInput:
0185 fromInput.pop(num)
0186
0187 if self.overWrite:
0188 for p in self.overWrite:
0189 self.relvalModule.steps.overwrite(p)
0190
0191
0192 if refRel:
0193 if ',' in refRel:
0194 refRels=refRel.split(',')
0195 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
0196 return
0197 self.relvalModule.changeRefRelease(
0198 self.relvalModule.steps,
0199 list(zip(self.relvalModule.baseDataSetRelease,refRels))
0200 )
0201 else:
0202 self.relvalModule.changeRefRelease(
0203 self.relvalModule.steps,
0204 [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
0205 )
0206
0207 for num, wfInfo in self.relvalModule.workflows.items():
0208 commands=[]
0209 wfName = wfInfo[0]
0210 stepList = wfInfo[1]
0211 stepOverrides=wfInfo.overrides
0212
0213 wfKey = ""
0214 wfSuffix = ""
0215 if isinstance(wfName, list) and len(wfName)>1:
0216 if len(wfName)>2: wfSuffix = wfName[2]
0217 wfKey = wfName[1]
0218 wfName = wfName[0]
0219
0220 if wfName.strip() == '': wfName = stepList[0]
0221
0222 addTo=None
0223 addCom=None
0224 if len(wfInfo)>=3:
0225 addCom=wfInfo[2]
0226 if not isinstance(addCom, list): addCom=[addCom]
0227
0228 if len(wfInfo)>=4:
0229 addTo=wfInfo[3]
0230
0231 while len(addTo)!=len(stepList):
0232 addTo.append(0)
0233
0234 name=wfName
0235
0236 if len(wfKey)>0:
0237 name = name+'+'+wfKey
0238 if len(wfSuffix)>0: name = name+wfSuffix
0239 stepIndex=0
0240 ranStepList=[]
0241 name_for_workflow = name
0242
0243
0244 if num in fromInput:
0245 ilevel=fromInput[num]
0246
0247 for (stepIr,step) in enumerate(reversed(stepList)):
0248 stepName=step
0249 stepI=(len(stepList)-stepIr)-1
0250
0251 if stepI>ilevel:
0252
0253 continue
0254 if stepI!=0:
0255 testName='__'.join(stepList[0:stepI+1])+'INPUT'
0256 else:
0257 testName=step+'INPUT'
0258
0259 if testName in self.relvalModule.steps:
0260
0261 stepList[stepI]=testName
0262
0263
0264 for p in range(stepI):
0265 stepList.pop(0)
0266
0267 break
0268
0269 for (stepI,step) in enumerate(stepList):
0270 stepName=step
0271 if self.relvalModule.steps[stepName] is None:
0272 continue
0273 if self.wm:
0274
0275 if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
0276 continue
0277
0278
0279
0280 if len(name) > 0 : name += '+'
0281
0282
0283 """
0284 if num in fromInput:
0285 if step+'INPUT' in self.relvalModule.steps.keys():
0286 stepName = step+"INPUT"
0287 stepList.remove(step)
0288 stepList.insert(stepIndex,stepName)
0289 """
0290 stepNameTmp = stepName
0291 if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
0292 if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
0293 name += stepNameTmp
0294 if addCom and (not addTo or addTo[stepIndex]==1):
0295 from Configuration.PyReleaseValidation.relval_steps import merge
0296 copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
0297 cfg, input, opts = self.makeCmd(copyStep)
0298 else:
0299 cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
0300
0301 if input and cfg :
0302 msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
0303 raise MatrixException(msg)
0304
0305 if input:
0306 cmd = input
0307 if self.noRun:
0308 cmd.run=[]
0309 else:
0310 if cfg:
0311 cmd = 'cmsDriver.py '+cfg+' '+opts
0312 else:
0313 cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
0314 if self.wm:
0315 cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
0316 if self.addCommand:
0317 if self.apply:
0318 if stepIndex in self.apply or stepName in self.apply:
0319 cmd +=' '+self.addCommand
0320 else:
0321 cmd +=' '+self.addCommand
0322 if self.wm and self.revertDqmio=='yes':
0323 cmd=cmd.replace('DQMIO','DQM')
0324 cmd=cmd.replace('--filetype DQM','')
0325 commands.append(cmd)
0326 ranStepList.append(stepName)
0327 stepIndex+=1
0328 self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
0329
0330 return
0331
0332
0333 def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
0334
0335 if selected:
0336 selected=map(float,selected)
0337 for matrixFile in self.files:
0338
0339 self.reset(what)
0340
0341 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0342 print("ignoring non-requested file",matrixFile)
0343 continue
0344
0345 if self.what == 'all' and not self.filesDefault[matrixFile]:
0346 print("ignoring file not used by default (enable with -w)",matrixFile)
0347 continue
0348
0349 try:
0350 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0351 if self.checkInputs:
0352 self.verifyDefaultInputs()
0353 except Exception as e:
0354 print("ERROR reading file:", matrixFile, str(e))
0355 raise
0356
0357 if not self.workFlowSteps: continue
0358
0359 dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
0360 outFile = open(dataFileName,'w')
0361
0362 print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
0363 ids = sorted(self.workFlowSteps.keys())
0364 indexAndSteps=[]
0365
0366 writtenWF=0
0367 for key in ids:
0368 if selected and not (key[0] in selected):
0369 continue
0370
0371 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
0372 num, name, commands, stepList = self.workFlowSteps[key]
0373 wfName,stepNames= name.split('+',1)
0374
0375 stepNames=stepNames.replace('+SKIMCOSD','')
0376 stepNames=stepNames.replace('+SKIMD','')
0377 if 'HARVEST' in stepNames:
0378
0379 exactb=stepNames.index('+HARVEST')
0380 exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
0381 stepNames=stepNames.replace(stepNames[exactb:exacte],'')
0382 otherSteps = None
0383 if '+' in stepNames:
0384 step1,otherSteps = stepNames.split('+',1)
0385
0386 line = str(num) + ' ++ '+ wfName
0387 if otherSteps and not step1Only:
0388 line += ' ++ ' +otherSteps.replace('+',',')
0389 else:
0390 line += ' ++ none'
0391 inputInfo=None
0392 if not isinstance(commands[0],str):
0393 inputInfo=commands[0]
0394 if otherSteps:
0395 for (i,c) in enumerate(otherSteps.split('+')):
0396
0397 for p in range(len(indexAndSteps),i+2):
0398 indexAndSteps.append(set())
0399 indexAndSteps[i+1].add((c,commands[i+1]))
0400
0401 if inputInfo :
0402
0403 if step1Only: continue
0404 line += ' ++ REALDATA: '+inputInfo.dataSet
0405 if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
0406 line += ', FILES: ' +str(inputInfo.files)
0407 line += ', EVENTS: '+str(inputInfo.events)
0408 if inputInfo.label!='':
0409 line += ', LABEL: ' +inputInfo.label
0410 line += ', LOCATION:'+inputInfo.location
0411 line += ' @@@'
0412 else:
0413 line += ' @@@ '+commands[0]
0414 if self.revertDqmio=='yes':
0415 line=line.replace('DQMIO','DQM')
0416 writtenWF+=1
0417 outFile.write(line+'\n')
0418
0419
0420 outFile.write('\n'+'\n')
0421 if step1Only: continue
0422
0423 for (index,s) in enumerate(indexAndSteps):
0424 for (stepName,cmd) in s:
0425 stepIndex=index+1
0426 if 'dasquery.log' in cmd: continue
0427 line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
0428 if self.revertDqmio=='yes':
0429 line=line.replace('DQMIO','DQM')
0430 outFile.write(line+'\n')
0431 outFile.write('\n'+'\n')
0432 outFile.close()
0433 print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
0434 return
0435
0436 def workFlowsByLocation(self, cafVeto=True):
0437
0438 onCAF = False
0439 if 'cms/caf/cms' in os.environ['CMS_PATH']:
0440 onCAF = True
0441
0442 workflows = []
0443 for workflow in self.workFlows:
0444 if isinstance(workflow.cmds[0], InputInfo):
0445 if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
0446 continue
0447 workflows.append(workflow)
0448
0449 return workflows
0450
0451 def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
0452 if selected: selected = list(map(float,selected))
0453 wfs = self.workFlowsByLocation(cafVeto)
0454 maxLen = 100
0455 fmt1 = "%-6s %-35s [1]: %s ..."
0456 fmt2 = " %35s [%d]: %s ..."
0457 print("\nfound a total of ", len(wfs), ' workflows:')
0458 if selected:
0459 print(" of which the following", len(selected), 'were selected:')
0460
0461 maxLen = -1
0462 fmt1 = "%-6s %-35s [1]: %s "
0463 fmt2 = " %35s [%d]: %s"
0464
0465 N=[]
0466 for wf in wfs:
0467 if selected and float(wf.numId) not in selected: continue
0468 if extended: print('')
0469
0470 for i in range(len(N),len(wf.cmds)): N.append(0)
0471 N[len(wf.cmds)-1]+=1
0472 wfName = wf.nameId
0473 stepNames = '+'.join(wf.stepList)
0474 for i,s in enumerate(wf.cmds):
0475 if extended:
0476 if i==0:
0477 print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
0478 else:
0479 print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
0480 else:
0481 print("%-6s %-35s "% (wf.numId, stepNames))
0482 break
0483 print('')
0484 for i,n in enumerate(N):
0485 if n: print(n,'workflows with',i+1,'steps')
0486
0487 return
0488
0489 def createWorkFlows(self, fileNameIn):
0490
0491 prefixIn = self.filesPrefMap[fileNameIn]
0492
0493
0494 keyList = self.workFlowSteps.keys()
0495 ids = []
0496 for item in keyList:
0497 id, pref = item
0498 if pref != prefixIn : continue
0499 ids.append(id)
0500 ids.sort()
0501 for key in ids:
0502 val = self.workFlowSteps[(key,prefixIn)]
0503 num, name, commands, stepList = val
0504 nameId = str(num)+'_'+name
0505 if nameId in self.nameList:
0506 print("==> duplicate name found for ", nameId)
0507 print(' keeping : ', self.nameList[nameId])
0508 print(' ignoring : ', val)
0509 else:
0510 self.nameList[nameId] = val
0511
0512 self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
0513
0514 return
0515
0516 def prepare(self, useInput=None, refRel='', fromScratch=None):
0517
0518 for matrixFile in self.files:
0519 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0520 print("ignoring non-requested file",matrixFile)
0521 continue
0522 if self.what == 'all' and not self.filesDefault[matrixFile]:
0523 print("ignoring",matrixFile,"from default matrix")
0524 continue
0525
0526 try:
0527 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0528 if self.checkInputs:
0529 self.verifyDefaultInputs()
0530 except Exception as e:
0531 print("ERROR reading file:", matrixFile, str(e))
0532 raise
0533
0534 try:
0535 self.createWorkFlows(matrixFile)
0536 except Exception as e:
0537 print("ERROR creating workflows :", str(e))
0538 raise
0539
0540
0541 def show(self, selected=None, extended=True, cafVeto=True):
0542
0543 self.showWorkFlows(selected, extended, cafVeto)
0544 print('\n','-'*80,'\n')
0545
0546
0547 def updateDB(self):
0548
0549 import pickle
0550 pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
0551
0552 return
0553