PyReleaseValidation/python/MatrixUtil.py

0001 import os
0002 import subprocess
0003
0004 class Matrix(dict):
0005     def __setitem__(self,key,value):
0006         if key in self:
0007             print("ERROR in Matrix")
0008             print("overwriting",key,"not allowed")
0009         else:
0010             self.update({float(key):WF(float(key),value)})
0011
0012     def addOverride(self,key,override):
0013         self[key].addOverride(override)
0014
0015 #the class to collect all possible steps
0016 class Steps(dict):
0017     def __setitem__(self,key,value):
0018         if key in self:
0019             print("ERROR in Step")
0020             print("overwriting",key,"not allowed")
0021             import sys
0022             sys.exit(-9)
0023         else:
0024             self.update({key:value})
0025             # make the python file named <step>.py
0026             #if not '--python' in value:                self[key].update({'--python':'%s.py'%(key,)})
0027
0028     def overwrite(self,keypair):
0029         value=self[keypair[1]]
0030         self.update({keypair[0]:value})
0031
0032 class WF(list):
0033     def __init__(self,n,l):
0034         self.extend(l)
0035         self.num=n
0036         #the actual steps of this WF
0037         self.steps=[]
0038         self.overrides={}
0039     def addOverride(self,overrides):
0040         self.overrides=overrides
0041
0042     def interpret(self,stepsDict):
0043         for s in self:
0044             print('steps',s,stepsDict[s])
0045             steps.append(stepsDict[s])
0046
0047
0048
0049 def expandLsInterval(lumis):
0050     return range(lumis[0],(lumis[1]+1))
0051
0052 from DPGAnalysis.Skims.golden_json_2015 import *
0053 jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")
0054 jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")
0055
0056 import json
0057 with open(jsonFile2015) as data_file:
0058     data_json2015 = json.load(data_file)
0059
0060 with open(jsonFile2016) as data_file:
0061     data_json2016 = json.load(data_file)
0062
0063 # return a portion of the 2015 golden json
0064 # LS for a full run by default; otherwise a subset of which you determined the size
0065 def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
0066     # print "maxNum is %s"%(maxNum)
0067     if not isinstance(list_runs[0], int):
0068         print("ERROR: list_runs must be a list of integers")
0069         return None
0070     local_dict = {}
0071     ls_count = 0
0072
0073     for run in list_runs:
0074         if str(run) in l_json.keys():
0075             # print "run %s is there"%(run)
0076             runNumber = run
0077             # print "Doing lumi-section selection for run %s: "%(run)
0078             for LSsegment in l_json[str(run)] :
0079                 # print LSsegment
0080                 ls_count += (LSsegment[-1] - LSsegment[0] + 1)
0081                 if (ls_count > maxNum) & (maxNum != -1):
0082                     break
0083                     # return local_dict
0084                 if runNumber in local_dict.keys():
0085                     local_dict[runNumber].append(LSsegment)
0086                 else:
0087                     local_dict[runNumber] = [LSsegment]
0088                 # print "total LS so far  %s    -   grow %s"%(ls_count,local_dict)
0089             #local_dict[runNumber] = [1,2,3]
0090         else:
0091             print("run %s is NOT present in json %s\n\n"%(run, l_json))
0092         # print "++    %s"%(local_dict)
0093
0094     if ( len(local_dict) > 0 ) :
0095         return local_dict
0096     else :
0097         print("No luminosity section interval passed the json and your selection; returning None")
0098         return None
0099
0100 # print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )
0101
0102
0103
0104
0105 InputInfoNDefault=2000000
0106 class InputInfo(object):
0107     def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
0108         self.run = run
0109         self.ls = ls
0110         self.files = files
0111         self.events = events
0112         self.location = location
0113         self.label = label
0114         self.dataSet = dataSet
0115         self.split = split
0116         self.ib_blacklist = ib_blacklist
0117         self.ib_block = ib_block
0118         self.dataSetParent = dataSetParent
0119         self.skimEvents = skimEvents
0120
0121     def das(self, das_options, dataset):
0122         if not self.skimEvents and (len(self.run) != 0 or self.ls):
0123             queries = self.queries(dataset)
0124             if len(self.run) != 0:
0125                 command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
0126             else:
0127               lumis = self.lumis()
0128               commands = []
0129               while queries:
0130                     commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
0131               command = ";".join(commands)
0132             command = "({0})".format(command)
0133         elif not self.skimEvents:
0134             command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
0135         elif self.skimEvents:
0136             from os import getenv
0137             if getenv("JENKINS_PREFIX") is not None:
0138                 # to be sure that whatever happens the files are only those at CERN
0139                 command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)
0140             else:
0141                 command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)
0142         # Run filter on DAS output
0143         if self.ib_blacklist:
0144             command += " | grep -E -v "
0145             command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
0146         if not self.skimEvents: ## keep run-lumi sorting
0147             from os import getenv
0148             if getenv("CMSSW_USE_IBEOS","false")=="true":
0149                 return "export CMSSW_USE_IBEOS=true; " + command + " | ibeos-lfn-sort"
0150             return command + " | sort -u"
0151         else:
0152             return command
0153
0154     def lumiRanges(self):
0155         if len(self.run) != 0:
0156             return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
0157         if self.ls :
0158             return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
0159         return None
0160
0161     def lumis(self):
0162       query_lumis = []
0163       if self.ls:
0164         for run in sorted(self.ls.keys()):
0165           run_lumis = []
0166           for rng in self.ls[run]:
0167               if isinstance(rng, int):
0168                   run_lumis.append(str(rng))
0169               else:
0170                   run_lumis.append(str(rng[0])+","+str(rng[1]))
0171           query_lumis.append(":".join(run_lumis))
0172       return query_lumis
0173
0174     def queries(self, dataset):
0175         query_by = "block" if self.ib_block else "dataset"
0176         query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset
0177
0178         if self.ls :
0179             the_queries = []
0180             #for query_run in self.ls.keys():
0181             # print "run is %s"%(query_run)
0182             # if you have a LS list specified, still query das for the full run (multiple ls queries take forever)
0183             # and use step1_lumiRanges.log to run only on LS which respect your selection
0184
0185             # DO WE WANT T2_CERN ?
0186             return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]
0187             #return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]
0188
0189
0190                 #
0191                 #for a_range in self.ls[query_run]:
0192                 #    # print "a_range is %s"%(a_range)
0193                 #    the_queries +=  ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]
0194             #print the_queries
0195             return the_queries
0196
0197         site = " site=T2_CH_CERN"
0198         if "CMSSW_DAS_QUERY_SITES" in os.environ:
0199             if os.environ["CMSSW_DAS_QUERY_SITES"]:
0200                 site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]
0201             else:
0202                 site = ""
0203         if len(self.run) != 0:
0204             return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]
0205             #return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]
0206         else:
0207             return ["file {0}={1}{2}".format(query_by, query_source, site)]
0208             #return ["file {0}={1} ".format(query_by, query_source)]
0209
0210     def __str__(self):
0211         if self.ib_block:
0212             return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)
0213         return "input from: {0} with run {1}".format(self.dataSet, self.run)
0214
0215
0216 # merge dictionaries, with prioty on the [0] index
0217 def merge(dictlist,TELL=False):
0218     import copy
0219     last=len(dictlist)-1
0220     if TELL: print(last,dictlist)
0221     if last==0:
0222         # ONLY ONE ITEM LEFT
0223         return copy.copy(dictlist[0])
0224     else:
0225         reducedlist=dictlist[0:max(0,last-1)]
0226         if TELL: print(reducedlist)
0227         # make a copy of the last item
0228         d=copy.copy(dictlist[last])
0229         # update with the last but one item
0230         d.update(dictlist[last-1])
0231         # and recursively do the rest
0232         reducedlist.append(d)
0233         return merge(reducedlist,TELL)
0234
0235 def remove(d,key,TELL=False):
0236     import copy
0237     e = copy.deepcopy(d)
0238     if TELL: print("original dict, BEF: %s"%d)
0239     del e[key]
0240     if TELL: print("copy-removed dict, AFT: %s"%e)
0241     return e
0242
0243
0244 #### Standard release validation samples ####
0245
0246 stCond={'--conditions':'auto:run1_mc'}
0247 def Kby(N,s):
0248     return {'--relval':'%s000,%s'%(N,s)}
0249 def Mby(N,s):
0250     return {'--relval':'%s000000,%s'%(N,s)}
0251
0252 def changeRefRelease(steps,listOfPairs):
0253     for s in steps:
0254         if ('INPUT' in steps[s]):
0255             oldD=steps[s]['INPUT'].dataSet
0256             for (ref,newRef) in listOfPairs:
0257                 if  ref in oldD:
0258                     steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)
0259         if '--pileup_input' in steps[s]:
0260             for (ref,newRef) in listOfPairs:
0261                 if ref in steps[s]['--pileup_input']:
0262                     steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)
0263
0264 def addForAll(steps,d):
0265     for s in steps:
0266         steps[s].update(d)
0267
0268
0269 def genvalid(fragment,d,suffix='all',fi='',dataSet=''):
0270     import copy
0271     c=copy.copy(d)
0272     if suffix:
0273         c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)
0274     if fi:
0275         c['--filein']='lhe:%d'%(fi,)
0276     if dataSet:
0277         c['--filein']='das:%s'%(dataSet,)
0278     c['cfg']=fragment
0279     return c
0280
0281 def check_dups(input):
0282     seen = set()
0283     dups = set(x for x in input if x in seen or seen.add(x))
0284
0285     return dups
0286
0287 class AvailableGPU():
0288
0289     def __init__(self, make, counter, id, capability, name):
0290         self.make = make
0291         self.counter = counter
0292         self.id = id
0293         self.capability = capability
0294         self.name = name
0295
0296     def __str__(self):
0297         return "> GPU no.{0}: {1} - {2} - {3} - {4}".format(self.counter,self.make,self.id,self.capability,self.name)
0298
0299     def isCUDA(self):
0300         return self.make == 'CUDA'
0301     def isROCM(self):
0302         return self.make == 'ROCM'
0303
0304     def gpuBind(self):
0305
0306         cmd = ''
0307         if self.make == 'CUDA':
0308             cmd = 'CUDA_VISIBLE_DEVICES=' + str(self.id) + " HIP_VISIBLE_DEVICES= "
0309         elif self.make == 'ROCM':
0310             cmd = 'CUDA_VISIBLE_DEVICES= HIP_VISIBLE_DEVICES=' + str(self.id) + " "
0311
0312         return cmd
0313
0314 def cleanComputeCapabilities(make, offset = 0):
0315
0316     # Building on top of {cuda|rocm}ComputeCapabilities
0317     # with output:
0318     # ID     computeCapability    Architetcure Model Info
0319
0320     out = subprocess.run(make + "ComputeCapabilities", capture_output = True, text = True)
0321
0322     if out.returncode > 0:
0323         return []
0324
0325     gpus = []
0326     for f in out.stdout.split("\n"):
0327
0328         if not len(f)>0:
0329             continue
0330
0331         if "unsupported" in f:
0332             print("> Warning! Unsupported GPU:")
0333             print(" > " + " ".join(f))
0334             continue
0335
0336         gpus.append(f.split())
0337
0338     gpus = [AvailableGPU(make.upper(), i + offset, int(f[0]),f[1]," ".join(f[2:])) for i,f in enumerate(gpus)]
0339
0340     return gpus