File indexing completed on 2024-12-01 23:40:07
0001 import sys, os
0002
0003 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
0004 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
0005
0006
0007
0008 class MatrixException(Exception):
0009 def __init__(self, msg):
0010 self.msg = msg
0011 def __str__(self):
0012 return self.msg
0013
0014
0015
0016 class MatrixReader(object):
0017
0018 def __init__(self, opt):
0019
0020 self.reset(opt.what)
0021
0022 self.wm=opt.wmcontrol
0023 self.revertDqmio=opt.revertDqmio
0024 self.addCommand=opt.command
0025 self.apply=opt.apply
0026 self.commandLineWf=opt.workflow
0027 self.overWrite=opt.overWrite
0028
0029 self.noRun = opt.noRun
0030 return
0031
0032 def reset(self, what='all'):
0033
0034 self.what = what
0035
0036
0037 self.workFlowSteps = {}
0038
0039 self.workFlows = []
0040 self.nameList = {}
0041
0042 self.filesPrefMap = {'relval_standard' : 'std-' ,
0043 'relval_highstats': 'hi-' ,
0044 'relval_pileup': 'PU-' ,
0045 'relval_generator': 'gen-',
0046 'relval_extendedgen': 'genExt-',
0047 'relval_production': 'prod-' ,
0048 'relval_ged': 'ged-',
0049 'relval_upgrade':'upg-',
0050 'relval_cleanedupgrade':'clnupg-',
0051 'relval_gpu':'gpu-',
0052 'relval_2017':'2017-',
0053 'relval_Run4':'Run4-',
0054 'relval_identity':'id-',
0055 'relval_machine': 'mach-',
0056 'relval_premix': 'premix-',
0057 'relval_nano':'nano-',
0058 'relval_data_highstats':'data-'
0059 }
0060
0061 self.files = ['relval_standard' ,
0062 'relval_highstats',
0063 'relval_pileup',
0064 'relval_generator',
0065 'relval_extendedgen',
0066 'relval_production',
0067 'relval_ged',
0068 'relval_upgrade',
0069 'relval_cleanedupgrade',
0070 'relval_gpu',
0071 'relval_2017',
0072 'relval_Run4',
0073 'relval_identity',
0074 'relval_machine',
0075 'relval_premix',
0076 'relval_nano',
0077 'relval_data_highstats'
0078 ]
0079 self.filesDefault = {'relval_standard':True ,
0080 'relval_highstats':True ,
0081 'relval_pileup':True,
0082 'relval_generator':True,
0083 'relval_extendedgen':True,
0084 'relval_production':True,
0085 'relval_ged':True,
0086 'relval_upgrade':False,
0087 'relval_cleanedupgrade':False,
0088 'relval_gpu':False,
0089 'relval_2017':True,
0090 'relval_Run4':True,
0091 'relval_identity':False,
0092 'relval_machine':True,
0093 'relval_premix':True,
0094 'relval_nano':True,
0095 'relval_data_highstats':False
0096 }
0097
0098 self.relvalModule = None
0099
0100 return
0101
0102 def makeCmd(self, step):
0103
0104 cmd = ''
0105 cfg = None
0106 input = None
0107 for k,v in step.items():
0108 if 'no_exec' in k : continue
0109 if k.lower() == 'cfg':
0110 cfg = v
0111 continue
0112 if k.lower() == 'input':
0113 input = v
0114 continue
0115
0116
0117
0118
0119 cmd += ' ' + k + ' ' + str(v)
0120 return cfg, input, cmd
0121
0122 def makeStep(self,step,overrides):
0123 from Configuration.PyReleaseValidation.relval_steps import merge
0124 if len(overrides) > 0:
0125 copyStep=merge([overrides]+[step])
0126 return copyStep
0127 else:
0128 return step
0129
0130 def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
0131
0132 prefix = self.filesPrefMap[fileNameIn]
0133
0134 print("processing", fileNameIn)
0135
0136 try:
0137 _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
0138 self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
0139 except Exception as e:
0140 print("ERROR importing file ", fileNameIn, str(e))
0141 return
0142
0143 if useInput is not None:
0144 print("request for INPUT for ", useInput)
0145
0146
0147 fromInput={}
0148
0149 if useInput:
0150 for i in useInput:
0151 if ':' in i:
0152 (ik,il)=i.split(':')
0153 if ik=='all':
0154 for k in self.relvalModule.workflows.keys():
0155 fromInput[float(k)]=int(il)
0156 else:
0157 fromInput[float(ik)]=int(il)
0158 else:
0159 if i=='all':
0160 for k in self.relvalModule.workflows.keys():
0161 fromInput[float(k)]=0
0162 else:
0163 fromInput[float(i)]=0
0164
0165 if fromScratch:
0166 fromScratch=map(float,fromScratch)
0167 for num in fromScratch:
0168 if num in fromInput:
0169 fromInput.pop(num)
0170
0171 if self.overWrite:
0172 for p in self.overWrite:
0173 self.relvalModule.steps.overwrite(p)
0174
0175
0176 if refRel:
0177 if ',' in refRel:
0178 refRels=refRel.split(',')
0179 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
0180 return
0181 self.relvalModule.changeRefRelease(
0182 self.relvalModule.steps,
0183 list(zip(self.relvalModule.baseDataSetRelease,refRels))
0184 )
0185 else:
0186 self.relvalModule.changeRefRelease(
0187 self.relvalModule.steps,
0188 [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
0189 )
0190
0191 for num, wfInfo in self.relvalModule.workflows.items():
0192 commands=[]
0193 wfName = wfInfo[0]
0194 stepList = wfInfo[1]
0195 stepOverrides=wfInfo.overrides
0196
0197 wfKey = ""
0198 wfSuffix = ""
0199 if isinstance(wfName, list) and len(wfName)>1:
0200 if len(wfName)>2: wfSuffix = wfName[2]
0201 wfKey = wfName[1]
0202 wfName = wfName[0]
0203
0204 if wfName.strip() == '': wfName = stepList[0]
0205
0206 addTo=None
0207 addCom=None
0208 if len(wfInfo)>=3:
0209 addCom=wfInfo[2]
0210 if not isinstance(addCom, list): addCom=[addCom]
0211
0212 if len(wfInfo)>=4:
0213 addTo=wfInfo[3]
0214
0215 while len(addTo)!=len(stepList):
0216 addTo.append(0)
0217
0218 name=wfName
0219
0220 if len(wfKey)>0:
0221 name = name+'+'+wfKey
0222 if len(wfSuffix)>0: name = name+wfSuffix
0223 stepIndex=0
0224 ranStepList=[]
0225 name_for_workflow = name
0226
0227
0228 if num in fromInput:
0229 ilevel=fromInput[num]
0230
0231 for (stepIr,step) in enumerate(reversed(stepList)):
0232 stepName=step
0233 stepI=(len(stepList)-stepIr)-1
0234
0235 if stepI>ilevel:
0236
0237 continue
0238 if stepI!=0:
0239 testName='__'.join(stepList[0:stepI+1])+'INPUT'
0240 else:
0241 testName=step+'INPUT'
0242
0243 if testName in self.relvalModule.steps:
0244
0245 stepList[stepI]=testName
0246
0247
0248 for p in range(stepI):
0249 stepList.pop(0)
0250
0251 break
0252
0253 for (stepI,step) in enumerate(stepList):
0254 stepName=step
0255 if self.relvalModule.steps[stepName] is None:
0256 continue
0257 if self.wm:
0258
0259 if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
0260 continue
0261
0262
0263
0264 if len(name) > 0 : name += '+'
0265
0266
0267 """
0268 if num in fromInput:
0269 if step+'INPUT' in self.relvalModule.steps.keys():
0270 stepName = step+"INPUT"
0271 stepList.remove(step)
0272 stepList.insert(stepIndex,stepName)
0273 """
0274 stepNameTmp = stepName
0275 if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
0276 if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
0277 name += stepNameTmp
0278 if addCom and (not addTo or addTo[stepIndex]==1):
0279 from Configuration.PyReleaseValidation.relval_steps import merge
0280 copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
0281 cfg, input, opts = self.makeCmd(copyStep)
0282 else:
0283 cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
0284
0285 if input and cfg :
0286 msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
0287 raise MatrixException(msg)
0288
0289 if input:
0290 cmd = input
0291 if self.noRun:
0292 cmd.run=[]
0293 else:
0294 if cfg:
0295 cmd = 'cmsDriver.py '+cfg+' '+opts
0296 else:
0297 cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
0298 if self.wm:
0299 cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
0300 if self.addCommand:
0301 if self.apply:
0302 if stepIndex in self.apply or stepName in self.apply:
0303 cmd +=' '+self.addCommand
0304 else:
0305 cmd +=' '+self.addCommand
0306 if self.wm and self.revertDqmio=='yes':
0307 cmd=cmd.replace('DQMIO','DQM')
0308 cmd=cmd.replace('--filetype DQM','')
0309 commands.append(cmd)
0310 ranStepList.append(stepName)
0311 stepIndex+=1
0312 self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
0313
0314 return
0315
0316
0317 def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
0318
0319 if selected:
0320 selected=map(float,selected)
0321 for matrixFile in self.files:
0322
0323 self.reset(what)
0324
0325 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0326 print("ignoring non-requested file",matrixFile)
0327 continue
0328
0329 if self.what == 'all' and not self.filesDefault[matrixFile]:
0330 print("ignoring file not used by default (enable with -w)",matrixFile)
0331 continue
0332
0333 try:
0334 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0335 except Exception as e:
0336 print("ERROR reading file:", matrixFile, str(e))
0337 raise
0338
0339 if not self.workFlowSteps: continue
0340
0341 dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
0342 outFile = open(dataFileName,'w')
0343
0344 print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
0345 ids = sorted(self.workFlowSteps.keys())
0346 indexAndSteps=[]
0347
0348 writtenWF=0
0349 for key in ids:
0350 if selected and not (key[0] in selected):
0351 continue
0352
0353 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
0354 num, name, commands, stepList = self.workFlowSteps[key]
0355 wfName,stepNames= name.split('+',1)
0356
0357 stepNames=stepNames.replace('+SKIMCOSD','')
0358 stepNames=stepNames.replace('+SKIMD','')
0359 if 'HARVEST' in stepNames:
0360
0361 exactb=stepNames.index('+HARVEST')
0362 exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
0363 stepNames=stepNames.replace(stepNames[exactb:exacte],'')
0364 otherSteps = None
0365 if '+' in stepNames:
0366 step1,otherSteps = stepNames.split('+',1)
0367
0368 line = str(num) + ' ++ '+ wfName
0369 if otherSteps and not step1Only:
0370 line += ' ++ ' +otherSteps.replace('+',',')
0371 else:
0372 line += ' ++ none'
0373 inputInfo=None
0374 if not isinstance(commands[0],str):
0375 inputInfo=commands[0]
0376 if otherSteps:
0377 for (i,c) in enumerate(otherSteps.split('+')):
0378
0379 for p in range(len(indexAndSteps),i+2):
0380 indexAndSteps.append(set())
0381 indexAndSteps[i+1].add((c,commands[i+1]))
0382
0383 if inputInfo :
0384
0385 if step1Only: continue
0386 line += ' ++ REALDATA: '+inputInfo.dataSet
0387 if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
0388 line += ', FILES: ' +str(inputInfo.files)
0389 line += ', EVENTS: '+str(inputInfo.events)
0390 if inputInfo.label!='':
0391 line += ', LABEL: ' +inputInfo.label
0392 line += ', LOCATION:'+inputInfo.location
0393 line += ' @@@'
0394 else:
0395 line += ' @@@ '+commands[0]
0396 if self.revertDqmio=='yes':
0397 line=line.replace('DQMIO','DQM')
0398 writtenWF+=1
0399 outFile.write(line+'\n')
0400
0401
0402 outFile.write('\n'+'\n')
0403 if step1Only: continue
0404
0405 for (index,s) in enumerate(indexAndSteps):
0406 for (stepName,cmd) in s:
0407 stepIndex=index+1
0408 if 'dasquery.log' in cmd: continue
0409 line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
0410 if self.revertDqmio=='yes':
0411 line=line.replace('DQMIO','DQM')
0412 outFile.write(line+'\n')
0413 outFile.write('\n'+'\n')
0414 outFile.close()
0415 print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
0416 return
0417
0418 def workFlowsByLocation(self, cafVeto=True):
0419
0420 onCAF = False
0421 if 'cms/caf/cms' in os.environ['CMS_PATH']:
0422 onCAF = True
0423
0424 workflows = []
0425 for workflow in self.workFlows:
0426 if isinstance(workflow.cmds[0], InputInfo):
0427 if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
0428 continue
0429 workflows.append(workflow)
0430
0431 return workflows
0432
0433 def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
0434 if selected: selected = list(map(float,selected))
0435 wfs = self.workFlowsByLocation(cafVeto)
0436 maxLen = 100
0437 fmt1 = "%-6s %-35s [1]: %s ..."
0438 fmt2 = " %35s [%d]: %s ..."
0439 print("\nfound a total of ", len(wfs), ' workflows:')
0440 if selected:
0441 print(" of which the following", len(selected), 'were selected:')
0442
0443 maxLen = -1
0444 fmt1 = "%-6s %-35s [1]: %s "
0445 fmt2 = " %35s [%d]: %s"
0446
0447 N=[]
0448 for wf in wfs:
0449 if selected and float(wf.numId) not in selected: continue
0450 if extended: print('')
0451
0452 for i in range(len(N),len(wf.cmds)): N.append(0)
0453 N[len(wf.cmds)-1]+=1
0454 wfName = wf.nameId
0455 stepNames = '+'.join(wf.stepList)
0456 for i,s in enumerate(wf.cmds):
0457 if extended:
0458 if i==0:
0459 print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
0460 else:
0461 print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
0462 else:
0463 print("%-6s %-35s "% (wf.numId, stepNames))
0464 break
0465 print('')
0466 for i,n in enumerate(N):
0467 if n: print(n,'workflows with',i+1,'steps')
0468
0469 return
0470
0471 def createWorkFlows(self, fileNameIn):
0472
0473 prefixIn = self.filesPrefMap[fileNameIn]
0474
0475
0476 keyList = self.workFlowSteps.keys()
0477 ids = []
0478 for item in keyList:
0479 id, pref = item
0480 if pref != prefixIn : continue
0481 ids.append(id)
0482 ids.sort()
0483 for key in ids:
0484 val = self.workFlowSteps[(key,prefixIn)]
0485 num, name, commands, stepList = val
0486 nameId = str(num)+'_'+name
0487 if nameId in self.nameList:
0488 print("==> duplicate name found for ", nameId)
0489 print(' keeping : ', self.nameList[nameId])
0490 print(' ignoring : ', val)
0491 else:
0492 self.nameList[nameId] = val
0493
0494 self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
0495
0496 return
0497
0498 def prepare(self, useInput=None, refRel='', fromScratch=None):
0499
0500 for matrixFile in self.files:
0501 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0502 print("ignoring non-requested file",matrixFile)
0503 continue
0504 if self.what == 'all' and not self.filesDefault[matrixFile]:
0505 print("ignoring",matrixFile,"from default matrix")
0506 continue
0507
0508 try:
0509 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0510 except Exception as e:
0511 print("ERROR reading file:", matrixFile, str(e))
0512 raise
0513
0514 try:
0515 self.createWorkFlows(matrixFile)
0516 except Exception as e:
0517 print("ERROR creating workflows :", str(e))
0518 raise
0519
0520
0521 def show(self, selected=None, extended=True, cafVeto=True):
0522
0523 self.showWorkFlows(selected, extended, cafVeto)
0524 print('\n','-'*80,'\n')
0525
0526
0527 def updateDB(self):
0528
0529 import pickle
0530 pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
0531
0532 return
0533