File indexing completed on 2023-05-17 02:07:22
0001 from __future__ import print_function
0002 import sys, os
0003
0004 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
0005 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
0006
0007
0008
0009 class MatrixException(Exception):
0010 def __init__(self, msg):
0011 self.msg = msg
0012 def __str__(self):
0013 return self.msg
0014
0015
0016
0017 class MatrixReader(object):
0018
0019 def __init__(self, opt):
0020
0021 self.reset(opt.what)
0022
0023 self.wm=opt.wmcontrol
0024 self.revertDqmio=opt.revertDqmio
0025 self.addCommand=opt.command
0026 self.apply=opt.apply
0027 self.commandLineWf=opt.workflow
0028 self.overWrite=opt.overWrite
0029
0030 self.noRun = opt.noRun
0031 return
0032
0033 def reset(self, what='all'):
0034
0035 self.what = what
0036
0037
0038 self.workFlowSteps = {}
0039
0040 self.workFlows = []
0041 self.nameList = {}
0042
0043 self.filesPrefMap = {'relval_standard' : 'std-' ,
0044 'relval_highstats': 'hi-' ,
0045 'relval_pileup': 'PU-' ,
0046 'relval_generator': 'gen-',
0047 'relval_extendedgen': 'genExt-',
0048 'relval_production': 'prod-' ,
0049 'relval_ged': 'ged-',
0050 'relval_upgrade':'upg-',
0051 'relval_cleanedupgrade':'clnupg-',
0052 'relval_gpu':'gpu-',
0053 'relval_2017':'2017-',
0054 'relval_2026':'2026-',
0055 'relval_identity':'id-',
0056 'relval_machine': 'mach-',
0057 'relval_premix': 'premix-',
0058 'relval_nano':'nano-'
0059 }
0060
0061 self.files = ['relval_standard' ,
0062 'relval_highstats',
0063 'relval_pileup',
0064 'relval_generator',
0065 'relval_extendedgen',
0066 'relval_production',
0067 'relval_ged',
0068 'relval_upgrade',
0069 'relval_cleanedupgrade',
0070 'relval_gpu',
0071 'relval_2017',
0072 'relval_2026',
0073 'relval_identity',
0074 'relval_machine',
0075 'relval_premix',
0076 'relval_nano'
0077 ]
0078 self.filesDefault = {'relval_standard':True ,
0079 'relval_highstats':True ,
0080 'relval_pileup':True,
0081 'relval_generator':True,
0082 'relval_extendedgen':True,
0083 'relval_production':True,
0084 'relval_ged':True,
0085 'relval_upgrade':False,
0086 'relval_cleanedupgrade':False,
0087 'relval_gpu':False,
0088 'relval_2017':True,
0089 'relval_2026':True,
0090 'relval_identity':False,
0091 'relval_machine':True,
0092 'relval_premix':True,
0093 'relval_nano':True
0094 }
0095
0096 self.relvalModule = None
0097
0098 return
0099
0100 def makeCmd(self, step):
0101
0102 cmd = ''
0103 cfg = None
0104 input = None
0105 for k,v in step.items():
0106 if 'no_exec' in k : continue
0107 if k.lower() == 'cfg':
0108 cfg = v
0109 continue
0110 if k.lower() == 'input':
0111 input = v
0112 continue
0113
0114
0115
0116
0117 cmd += ' ' + k + ' ' + str(v)
0118 return cfg, input, cmd
0119
0120 def makeStep(self,step,overrides):
0121 from Configuration.PyReleaseValidation.relval_steps import merge
0122 if len(overrides) > 0:
0123 copyStep=merge([overrides]+[step])
0124 return copyStep
0125 else:
0126 return step
0127
0128 def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
0129
0130 prefix = self.filesPrefMap[fileNameIn]
0131
0132 print("processing", fileNameIn)
0133
0134 try:
0135 _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
0136 self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
0137 except Exception as e:
0138 print("ERROR importing file ", fileNameIn, str(e))
0139 return
0140
0141 if useInput is not None:
0142 print("request for INPUT for ", useInput)
0143
0144
0145 fromInput={}
0146
0147 if useInput:
0148 for i in useInput:
0149 if ':' in i:
0150 (ik,il)=i.split(':')
0151 if ik=='all':
0152 for k in self.relvalModule.workflows.keys():
0153 fromInput[float(k)]=int(il)
0154 else:
0155 fromInput[float(ik)]=int(il)
0156 else:
0157 if i=='all':
0158 for k in self.relvalModule.workflows.keys():
0159 fromInput[float(k)]=0
0160 else:
0161 fromInput[float(i)]=0
0162
0163 if fromScratch:
0164 fromScratch=map(float,fromScratch)
0165 for num in fromScratch:
0166 if num in fromInput:
0167 fromInput.pop(num)
0168
0169 if self.overWrite:
0170 for p in self.overWrite:
0171 self.relvalModule.steps.overwrite(p)
0172
0173
0174 if refRel:
0175 if ',' in refRel:
0176 refRels=refRel.split(',')
0177 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
0178 return
0179 self.relvalModule.changeRefRelease(
0180 self.relvalModule.steps,
0181 list(zip(self.relvalModule.baseDataSetRelease,refRels))
0182 )
0183 else:
0184 self.relvalModule.changeRefRelease(
0185 self.relvalModule.steps,
0186 [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
0187 )
0188
0189 for num, wfInfo in self.relvalModule.workflows.items():
0190 commands=[]
0191 wfName = wfInfo[0]
0192 stepList = wfInfo[1]
0193 stepOverrides=wfInfo.overrides
0194
0195 wfKey = ""
0196 wfSuffix = ""
0197 if isinstance(wfName, list) and len(wfName)>1:
0198 if len(wfName)>2: wfSuffix = wfName[2]
0199 wfKey = wfName[1]
0200 wfName = wfName[0]
0201
0202 if wfName.strip() == '': wfName = stepList[0]
0203
0204 addTo=None
0205 addCom=None
0206 if len(wfInfo)>=3:
0207 addCom=wfInfo[2]
0208 if not isinstance(addCom, list): addCom=[addCom]
0209
0210 if len(wfInfo)>=4:
0211 addTo=wfInfo[3]
0212
0213 while len(addTo)!=len(stepList):
0214 addTo.append(0)
0215
0216 name=wfName
0217
0218 if len(wfKey)>0:
0219 name = name+'+'+wfKey
0220 if len(wfSuffix)>0: name = name+wfSuffix
0221 stepIndex=0
0222 ranStepList=[]
0223 name_for_workflow = name
0224
0225
0226 if num in fromInput:
0227 ilevel=fromInput[num]
0228
0229 for (stepIr,step) in enumerate(reversed(stepList)):
0230 stepName=step
0231 stepI=(len(stepList)-stepIr)-1
0232
0233 if stepI>ilevel:
0234
0235 continue
0236 if stepI!=0:
0237 testName='__'.join(stepList[0:stepI+1])+'INPUT'
0238 else:
0239 testName=step+'INPUT'
0240
0241 if testName in self.relvalModule.steps:
0242
0243 stepList[stepI]=testName
0244
0245
0246 for p in range(stepI):
0247 stepList.pop(0)
0248
0249 break
0250
0251 for (stepI,step) in enumerate(stepList):
0252 stepName=step
0253 if self.relvalModule.steps[stepName] is None:
0254 continue
0255 if self.wm:
0256
0257 if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
0258 continue
0259
0260
0261
0262 if len(name) > 0 : name += '+'
0263
0264
0265 """
0266 if num in fromInput:
0267 if step+'INPUT' in self.relvalModule.steps.keys():
0268 stepName = step+"INPUT"
0269 stepList.remove(step)
0270 stepList.insert(stepIndex,stepName)
0271 """
0272 stepNameTmp = stepName
0273 if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
0274 if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
0275 name += stepNameTmp
0276 if addCom and (not addTo or addTo[stepIndex]==1):
0277 from Configuration.PyReleaseValidation.relval_steps import merge
0278 copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
0279 cfg, input, opts = self.makeCmd(copyStep)
0280 else:
0281 cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
0282
0283 if input and cfg :
0284 msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
0285 raise MatrixException(msg)
0286
0287 if input:
0288 cmd = input
0289 if self.noRun:
0290 cmd.run=[]
0291 else:
0292 if cfg:
0293 cmd = 'cmsDriver.py '+cfg+' '+opts
0294 else:
0295 cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
0296 if self.wm:
0297 cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
0298 if self.addCommand:
0299 if self.apply:
0300 if stepIndex in self.apply or stepName in self.apply:
0301 cmd +=' '+self.addCommand
0302 else:
0303 cmd +=' '+self.addCommand
0304 if self.wm and self.revertDqmio=='yes':
0305 cmd=cmd.replace('DQMIO','DQM')
0306 cmd=cmd.replace('--filetype DQM','')
0307 commands.append(cmd)
0308 ranStepList.append(stepName)
0309 stepIndex+=1
0310 self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
0311
0312 return
0313
0314
0315 def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
0316
0317 if selected:
0318 selected=map(float,selected)
0319 for matrixFile in self.files:
0320
0321 self.reset(what)
0322
0323 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0324 print("ignoring non-requested file",matrixFile)
0325 continue
0326
0327 if self.what == 'all' and not self.filesDefault[matrixFile]:
0328 print("ignoring file not used by default (enable with -w)",matrixFile)
0329 continue
0330
0331 try:
0332 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0333 except Exception as e:
0334 print("ERROR reading file:", matrixFile, str(e))
0335 raise
0336
0337 if not self.workFlowSteps: continue
0338
0339 dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
0340 outFile = open(dataFileName,'w')
0341
0342 print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
0343 ids = sorted(self.workFlowSteps.keys())
0344 indexAndSteps=[]
0345
0346 writtenWF=0
0347 for key in ids:
0348 if selected and not (key[0] in selected):
0349 continue
0350
0351 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
0352 num, name, commands, stepList = self.workFlowSteps[key]
0353 wfName,stepNames= name.split('+',1)
0354
0355 stepNames=stepNames.replace('+SKIMCOSD','')
0356 stepNames=stepNames.replace('+SKIMD','')
0357 if 'HARVEST' in stepNames:
0358
0359 exactb=stepNames.index('+HARVEST')
0360 exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
0361 stepNames=stepNames.replace(stepNames[exactb:exacte],'')
0362 otherSteps = None
0363 if '+' in stepNames:
0364 step1,otherSteps = stepNames.split('+',1)
0365
0366 line = str(num) + ' ++ '+ wfName
0367 if otherSteps and not step1Only:
0368 line += ' ++ ' +otherSteps.replace('+',',')
0369 else:
0370 line += ' ++ none'
0371 inputInfo=None
0372 if not isinstance(commands[0],str):
0373 inputInfo=commands[0]
0374 if otherSteps:
0375 for (i,c) in enumerate(otherSteps.split('+')):
0376
0377 for p in range(len(indexAndSteps),i+2):
0378 indexAndSteps.append(set())
0379 indexAndSteps[i+1].add((c,commands[i+1]))
0380
0381 if inputInfo :
0382
0383 if step1Only: continue
0384 line += ' ++ REALDATA: '+inputInfo.dataSet
0385 if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
0386 line += ', FILES: ' +str(inputInfo.files)
0387 line += ', EVENTS: '+str(inputInfo.events)
0388 if inputInfo.label!='':
0389 line += ', LABEL: ' +inputInfo.label
0390 line += ', LOCATION:'+inputInfo.location
0391 line += ' @@@'
0392 else:
0393 line += ' @@@ '+commands[0]
0394 if self.revertDqmio=='yes':
0395 line=line.replace('DQMIO','DQM')
0396 writtenWF+=1
0397 outFile.write(line+'\n')
0398
0399
0400 outFile.write('\n'+'\n')
0401 if step1Only: continue
0402
0403 for (index,s) in enumerate(indexAndSteps):
0404 for (stepName,cmd) in s:
0405 stepIndex=index+1
0406 if 'dasquery.log' in cmd: continue
0407 line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
0408 if self.revertDqmio=='yes':
0409 line=line.replace('DQMIO','DQM')
0410 outFile.write(line+'\n')
0411 outFile.write('\n'+'\n')
0412 outFile.close()
0413 print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
0414 return
0415
0416 def workFlowsByLocation(self, cafVeto=True):
0417
0418 onCAF = False
0419 if 'cms/caf/cms' in os.environ['CMS_PATH']:
0420 onCAF = True
0421
0422 workflows = []
0423 for workflow in self.workFlows:
0424 if isinstance(workflow.cmds[0], InputInfo):
0425 if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
0426 continue
0427 workflows.append(workflow)
0428
0429 return workflows
0430
0431 def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
0432 if selected: selected = list(map(float,selected))
0433 wfs = self.workFlowsByLocation(cafVeto)
0434 maxLen = 100
0435 fmt1 = "%-6s %-35s [1]: %s ..."
0436 fmt2 = " %35s [%d]: %s ..."
0437 print("\nfound a total of ", len(wfs), ' workflows:')
0438 if selected:
0439 print(" of which the following", len(selected), 'were selected:')
0440
0441 maxLen = -1
0442 fmt1 = "%-6s %-35s [1]: %s "
0443 fmt2 = " %35s [%d]: %s"
0444
0445 N=[]
0446 for wf in wfs:
0447 if selected and float(wf.numId) not in selected: continue
0448 if extended: print('')
0449
0450 for i in range(len(N),len(wf.cmds)): N.append(0)
0451 N[len(wf.cmds)-1]+=1
0452 wfName = wf.nameId
0453 stepNames = '+'.join(wf.stepList)
0454 for i,s in enumerate(wf.cmds):
0455 if extended:
0456 if i==0:
0457 print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
0458 else:
0459 print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
0460 else:
0461 print("%-6s %-35s "% (wf.numId, stepNames))
0462 break
0463 print('')
0464 for i,n in enumerate(N):
0465 if n: print(n,'workflows with',i+1,'steps')
0466
0467 return
0468
0469 def createWorkFlows(self, fileNameIn):
0470
0471 prefixIn = self.filesPrefMap[fileNameIn]
0472
0473
0474 keyList = self.workFlowSteps.keys()
0475 ids = []
0476 for item in keyList:
0477 id, pref = item
0478 if pref != prefixIn : continue
0479 ids.append(id)
0480 ids.sort()
0481 for key in ids:
0482 val = self.workFlowSteps[(key,prefixIn)]
0483 num, name, commands, stepList = val
0484 nameId = str(num)+'_'+name
0485 if nameId in self.nameList:
0486 print("==> duplicate name found for ", nameId)
0487 print(' keeping : ', self.nameList[nameId])
0488 print(' ignoring : ', val)
0489 else:
0490 self.nameList[nameId] = val
0491
0492 self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
0493
0494 return
0495
0496 def prepare(self, useInput=None, refRel='', fromScratch=None):
0497
0498 for matrixFile in self.files:
0499 if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
0500 print("ignoring non-requested file",matrixFile)
0501 continue
0502 if self.what == 'all' and not self.filesDefault[matrixFile]:
0503 print("ignoring",matrixFile,"from default matrix")
0504 continue
0505
0506 try:
0507 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
0508 except Exception as e:
0509 print("ERROR reading file:", matrixFile, str(e))
0510 raise
0511
0512 try:
0513 self.createWorkFlows(matrixFile)
0514 except Exception as e:
0515 print("ERROR creating workflows :", str(e))
0516 raise
0517
0518
0519 def show(self, selected=None, extended=True, cafVeto=True):
0520
0521 self.showWorkFlows(selected, extended, cafVeto)
0522 print('\n','-'*80,'\n')
0523
0524
0525 def updateDB(self):
0526
0527 import pickle
0528 pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
0529
0530 return
0531