MatrixUtil.py

CMSSW/Configuration/PyReleaseValidation/python/MatrixUtil.py

Line Code

Line	Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340	`import os` `import subprocess` `class Matrix(dict):` `def __setitem__(self,key,value):` `if key in self:` `print("ERROR in Matrix")` `print("overwriting",key,"not allowed")` `else:` `self.update({float(key):WF(float(key),value)})` `def addOverride(self,key,override):` `self[key].addOverride(override)` `#the class to collect all possible steps` `class Steps(dict):` `def __setitem__(self,key,value):` `if key in self:` `print("ERROR in Step")` `print("overwriting",key,"not allowed")` `import sys` `sys.exit(-9)` `else:` `self.update({key:value})` `# make the python file named <step>.py` `#if not '--python' in value: self[key].update({'--python':'%s.py'%(key,)})` `def overwrite(self,keypair):` `value=self[keypair[1]]` `self.update({keypair[0]:value})` `class WF(list):` `def __init__(self,n,l):` `self.extend(l)` `self.num=n` `#the actual steps of this WF` `self.steps=[]` `self.overrides={}` `def addOverride(self,overrides):` `self.overrides=overrides` `def interpret(self,stepsDict):` `for s in self:` `print('steps',s,stepsDict[s])` `steps.append(stepsDict[s])` `def expandLsInterval(lumis):` `return range(lumis[0],(lumis[1]+1))` `from DPGAnalysis.Skims.golden_json_2015 import *` `jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")` `jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")` `import json` `with open(jsonFile2015) as data_file:` `data_json2015 = json.load(data_file)` `with open(jsonFile2016) as data_file:` `data_json2016 = json.load(data_file)` `# return a portion of the 2015 golden json` `# LS for a full run by default; otherwise a subset of which you determined the size` `def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):` `# print "maxNum is %s"%(maxNum)` `if not isinstance(list_runs[0], int):` `print("ERROR: list_runs must be a list of integers")` `return None` `local_dict = {}` `ls_count = 0` `for run in list_runs:` `if str(run) in l_json.keys():` `# print "run %s is there"%(run)` `runNumber = run` `# print "Doing lumi-section selection for run %s: "%(run)` `for LSsegment in l_json[str(run)] :` `# print LSsegment` `ls_count += (LSsegment[-1] - LSsegment[0] + 1)` `if (ls_count > maxNum) & (maxNum != -1):` `break` `# return local_dict` `if runNumber in local_dict.keys():` `local_dict[runNumber].append(LSsegment)` `else:` `local_dict[runNumber] = [LSsegment]` `# print "total LS so far %s - grow %s"%(ls_count,local_dict)` `#local_dict[runNumber] = [1,2,3]` `else:` `print("run %s is NOT present in json %s\n\n"%(run, l_json))` `# print "++ %s"%(local_dict)` `if ( len(local_dict) > 0 ) :` `return local_dict` `else :` `print("No luminosity section interval passed the json and your selection; returning None")` `return None` `# print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )` `InputInfoNDefault=2000000` `class InputInfo(object):` `def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :` `self.run = run` `self.ls = ls` `self.files = files` `self.events = events` `self.location = location` `self.label = label` `self.dataSet = dataSet` `self.split = split` `self.ib_blacklist = ib_blacklist` `self.ib_block = ib_block` `self.dataSetParent = dataSetParent` `self.skimEvents = skimEvents` `def das(self, das_options, dataset):` `if not self.skimEvents and (len(self.run) != 0 or self.ls):` `queries = self.queries(dataset)` `if len(self.run) != 0:` `command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])` `else:` `lumis = self.lumis()` `commands = []` `while queries:` `commands.append("dasgoclient %s --query 'lumi,%s' --format json \| das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))` `command = ";".join(commands)` `command = "({0})".format(command)` `elif not self.skimEvents:` `command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])` `elif self.skimEvents:` `from os import getenv` `if getenv("JENKINS_PREFIX") is not None:` `# to be sure that whatever happens the files are only those at CERN` `command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)` `else:` `command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)` `# Run filter on DAS output` `if self.ib_blacklist:` `command += " \| grep -E -v "` `command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])` `if not self.skimEvents: ## keep run-lumi sorting` `from os import getenv` `if getenv("CMSSW_USE_IBEOS","false")=="true":` `return "export CMSSW_USE_IBEOS=true; " + command + " \| ibeos-lfn-sort"` `return command + " \| sort -u"` `else:` `return command` `def lumiRanges(self):` `if len(self.run) != 0:` `return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"` `if self.ls :` `return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"` `return None` `def lumis(self):` `query_lumis = []` `if self.ls:` `for run in sorted(self.ls.keys()):` `run_lumis = []` `for rng in self.ls[run]:` `if isinstance(rng, int):` `run_lumis.append(str(rng))` `else:` `run_lumis.append(str(rng[0])+","+str(rng[1]))` `query_lumis.append(":".join(run_lumis))` `return query_lumis` `def queries(self, dataset):` `query_by = "block" if self.ib_block else "dataset"` `query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset` `if self.ls :` `the_queries = []` `#for query_run in self.ls.keys():` `# print "run is %s"%(query_run)` `# if you have a LS list specified, still query das for the full run (multiple ls queries take forever)` `# and use step1_lumiRanges.log to run only on LS which respect your selection` `# DO WE WANT T2_CERN ?` `return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]` `#return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]` `#` `#for a_range in self.ls[query_run]:` `# # print "a_range is %s"%(a_range)` `# the_queries += ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]` `#print the_queries` `return the_queries` `site = " site=T2_CH_CERN"` `if "CMSSW_DAS_QUERY_SITES" in os.environ:` `if os.environ["CMSSW_DAS_QUERY_SITES"]:` `site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]` `else:` `site = ""` `if len(self.run) != 0:` `return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]` `#return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]` `else:` `return ["file {0}={1}{2}".format(query_by, query_source, site)]` `#return ["file {0}={1} ".format(query_by, query_source)]` `def __str__(self):` `if self.ib_block:` `return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)` `return "input from: {0} with run {1}".format(self.dataSet, self.run)` `# merge dictionaries, with prioty on the [0] index` `def merge(dictlist,TELL=False):` `import copy` `last=len(dictlist)-1` `if TELL: print(last,dictlist)` `if last==0:` `# ONLY ONE ITEM LEFT` `return copy.copy(dictlist[0])` `else:` `reducedlist=dictlist[0:max(0,last-1)]` `if TELL: print(reducedlist)` `# make a copy of the last item` `d=copy.copy(dictlist[last])` `# update with the last but one item` `d.update(dictlist[last-1])` `# and recursively do the rest` `reducedlist.append(d)` `return merge(reducedlist,TELL)` `def remove(d,key,TELL=False):` `import copy` `e = copy.deepcopy(d)` `if TELL: print("original dict, BEF: %s"%d)` `del e[key]` `if TELL: print("copy-removed dict, AFT: %s"%e)` `return e` `#### Standard release validation samples ####` `stCond={'--conditions':'auto:run1_mc'}` `def Kby(N,s):` `return {'--relval':'%s000,%s'%(N,s)}` `def Mby(N,s):` `return {'--relval':'%s000000,%s'%(N,s)}` `def changeRefRelease(steps,listOfPairs):` `for s in steps:` `if ('INPUT' in steps[s]):` `oldD=steps[s]['INPUT'].dataSet` `for (ref,newRef) in listOfPairs:` `if ref in oldD:` `steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)` `if '--pileup_input' in steps[s]:` `for (ref,newRef) in listOfPairs:` `if ref in steps[s]['--pileup_input']:` `steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)` `def addForAll(steps,d):` `for s in steps:` `steps[s].update(d)` `def genvalid(fragment,d,suffix='all',fi='',dataSet=''):` `import copy` `c=copy.copy(d)` `if suffix:` `c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)` `if fi:` `c['--filein']='lhe:%d'%(fi,)` `if dataSet:` `c['--filein']='das:%s'%(dataSet,)` `c['cfg']=fragment` `return c` `def check_dups(input):` `seen = set()` `dups = set(x for x in input if x in seen or seen.add(x))` `return dups` `class AvailableGPU():` `def __init__(self, make, counter, id, capability, name):` `self.make = make` `self.counter = counter` `self.id = id` `self.capability = capability` `self.name = name` `def __str__(self):` `return "> GPU no.{0}: {1} - {2} - {3} - {4}".format(self.counter,self.make,self.id,self.capability,self.name)` `def isCUDA(self):` `return self.make == 'CUDA'` `def isROCM(self):` `return self.make == 'ROCM'` `def gpuBind(self):` `cmd = ''` `if self.make == 'CUDA':` `cmd = 'CUDA_VISIBLE_DEVICES=' + str(self.id) + " HIP_VISIBLE_DEVICES= "` `elif self.make == 'ROCM':` `cmd = 'CUDA_VISIBLE_DEVICES= HIP_VISIBLE_DEVICES=' + str(self.id) + " "` `return cmd` `def cleanComputeCapabilities(make, offset = 0):` `# Building on top of {cuda\|rocm}ComputeCapabilities` `# with output:` `# ID computeCapability Architetcure Model Info` `out = subprocess.run(make + "ComputeCapabilities", capture_output = True, text = True)` `if out.returncode > 0:` `return []` `gpus = []` `for f in out.stdout.split("\n"):` `if not len(f)>0:` `continue` `if "unsupported" in f:` `print("> Warning! Unsupported GPU:")` `print(" > " + " ".join(f))` `continue` `gpus.append(f.split())` `gpus = [AvailableGPU(make.upper(), i + offset, int(f[0]),f[1]," ".join(f[2:])) for i,f in enumerate(gpus)]` `return gpus`

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340

import os
import subprocess

class Matrix(dict):
    def __setitem__(self,key,value):
        if key in self:
            print("ERROR in Matrix")
            print("overwriting",key,"not allowed")
        else:
            self.update({float(key):WF(float(key),value)})

    def addOverride(self,key,override):
        self[key].addOverride(override)
            
#the class to collect all possible steps
class Steps(dict):
    def __setitem__(self,key,value):
        if key in self:
            print("ERROR in Step")
            print("overwriting",key,"not allowed")
            import sys
            sys.exit(-9)
        else:
            self.update({key:value})
            # make the python file named <step>.py
            #if not '--python' in value:                self[key].update({'--python':'%s.py'%(key,)})

    def overwrite(self,keypair):
        value=self[keypair[1]]
        self.update({keypair[0]:value})
        
class WF(list):
    def __init__(self,n,l):
        self.extend(l)
        self.num=n
        #the actual steps of this WF
        self.steps=[]
        self.overrides={}
    def addOverride(self,overrides):
        self.overrides=overrides
        
    def interpret(self,stepsDict):
        for s in self:
            print('steps',s,stepsDict[s])
            steps.append(stepsDict[s])
    


def expandLsInterval(lumis):
    return range(lumis[0],(lumis[1]+1))

from DPGAnalysis.Skims.golden_json_2015 import * 
jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")
jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")

import json
with open(jsonFile2015) as data_file:
    data_json2015 = json.load(data_file)

with open(jsonFile2016) as data_file:
    data_json2016 = json.load(data_file)

# return a portion of the 2015 golden json
# LS for a full run by default; otherwise a subset of which you determined the size
def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
    # print "maxNum is %s"%(maxNum)
    if not isinstance(list_runs[0], int):
        print("ERROR: list_runs must be a list of integers")
        return None
    local_dict = {}
    ls_count = 0

    for run in list_runs:
        if str(run) in l_json.keys():
            # print "run %s is there"%(run)
            runNumber = run
            # print "Doing lumi-section selection for run %s: "%(run)
            for LSsegment in l_json[str(run)] :
                # print LSsegment
                ls_count += (LSsegment[-1] - LSsegment[0] + 1)
                if (ls_count > maxNum) & (maxNum != -1):
                    break
                    # return local_dict
                if runNumber in local_dict.keys():
                    local_dict[runNumber].append(LSsegment)
                else: 
                    local_dict[runNumber] = [LSsegment]
                # print "total LS so far  %s    -   grow %s"%(ls_count,local_dict)
            #local_dict[runNumber] = [1,2,3]
        else:
            print("run %s is NOT present in json %s\n\n"%(run, l_json))
        # print "++    %s"%(local_dict)

    if ( len(local_dict) > 0 ) :
        return local_dict
    else :
        print("No luminosity section interval passed the json and your selection; returning None")
        return None

# print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )




InputInfoNDefault=2000000    
class InputInfo(object):
    def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
        self.run = run
        self.ls = ls
        self.files = files
        self.events = events
        self.location = location
        self.label = label
        self.dataSet = dataSet
        self.split = split
        self.ib_blacklist = ib_blacklist
        self.ib_block = ib_block
        self.dataSetParent = dataSetParent
        self.skimEvents = skimEvents

    def das(self, das_options, dataset):
        if not self.skimEvents and (len(self.run) != 0 or self.ls):
            queries = self.queries(dataset)
            if len(self.run) != 0:
                command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
            else:
              lumis = self.lumis()
              commands = []
              while queries:
                    commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
              command = ";".join(commands)
            command = "({0})".format(command)
        elif not self.skimEvents:
            command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
        elif self.skimEvents:
            from os import getenv
            if getenv("JENKINS_PREFIX") is not None:
                # to be sure that whatever happens the files are only those at CERN
                command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)
            else:
                command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)
        # Run filter on DAS output 
        if self.ib_blacklist:
            command += " | grep -E -v "
            command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
        if not self.skimEvents: ## keep run-lumi sorting
            from os import getenv
            if getenv("CMSSW_USE_IBEOS","false")=="true":
                return "export CMSSW_USE_IBEOS=true; " + command + " | ibeos-lfn-sort"
            return command + " | sort -u"
        else:
            return command

    def lumiRanges(self):
        if len(self.run) != 0:
            return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
        if self.ls :
            return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
        return None
    
    def lumis(self):
      query_lumis = []
      if self.ls:
        for run in sorted(self.ls.keys()):
          run_lumis = []
          for rng in self.ls[run]:
              if isinstance(rng, int):
                  run_lumis.append(str(rng))
              else:
                  run_lumis.append(str(rng[0])+","+str(rng[1]))
          query_lumis.append(":".join(run_lumis))
      return query_lumis

    def queries(self, dataset):
        query_by = "block" if self.ib_block else "dataset"
        query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset

        if self.ls :
            the_queries = []
            #for query_run in self.ls.keys():
            # print "run is %s"%(query_run)
            # if you have a LS list specified, still query das for the full run (multiple ls queries take forever)
            # and use step1_lumiRanges.log to run only on LS which respect your selection

            # DO WE WANT T2_CERN ?
            return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]
            #return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]


                # 
                #for a_range in self.ls[query_run]:
                #    # print "a_range is %s"%(a_range)
                #    the_queries +=  ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]
            #print the_queries
            return the_queries

        site = " site=T2_CH_CERN"
        if "CMSSW_DAS_QUERY_SITES" in os.environ:
            if os.environ["CMSSW_DAS_QUERY_SITES"]:
                site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]
            else:
                site = ""
        if len(self.run) != 0:
            return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]
            #return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]
        else:
            return ["file {0}={1}{2}".format(query_by, query_source, site)]
            #return ["file {0}={1} ".format(query_by, query_source)]

    def __str__(self):
        if self.ib_block:
            return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)
        return "input from: {0} with run {1}".format(self.dataSet, self.run)

    
# merge dictionaries, with prioty on the [0] index
def merge(dictlist,TELL=False):
    import copy
    last=len(dictlist)-1
    if TELL: print(last,dictlist)
    if last==0:
        # ONLY ONE ITEM LEFT
        return copy.copy(dictlist[0])
    else:
        reducedlist=dictlist[0:max(0,last-1)]
        if TELL: print(reducedlist)
        # make a copy of the last item
        d=copy.copy(dictlist[last])
        # update with the last but one item
        d.update(dictlist[last-1])
        # and recursively do the rest
        reducedlist.append(d)
        return merge(reducedlist,TELL)

def remove(d,key,TELL=False):
    import copy
    e = copy.deepcopy(d)
    if TELL: print("original dict, BEF: %s"%d)
    del e[key]
    if TELL: print("copy-removed dict, AFT: %s"%e)
    return e


#### Standard release validation samples ####

stCond={'--conditions':'auto:run1_mc'}
def Kby(N,s):
    return {'--relval':'%s000,%s'%(N,s)}
def Mby(N,s):
    return {'--relval':'%s000000,%s'%(N,s)}

def changeRefRelease(steps,listOfPairs):
    for s in steps:
        if ('INPUT' in steps[s]):
            oldD=steps[s]['INPUT'].dataSet
            for (ref,newRef) in listOfPairs:
                if  ref in oldD:
                    steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)
        if '--pileup_input' in steps[s]:
            for (ref,newRef) in listOfPairs:
                if ref in steps[s]['--pileup_input']:
                    steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)
        
def addForAll(steps,d):
    for s in steps:
        steps[s].update(d)


def genvalid(fragment,d,suffix='all',fi='',dataSet=''):
    import copy
    c=copy.copy(d)
    if suffix:
        c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)
    if fi:
        c['--filein']='lhe:%d'%(fi,)
    if dataSet:
        c['--filein']='das:%s'%(dataSet,)
    c['cfg']=fragment
    return c

def check_dups(input):
    seen = set()
    dups = set(x for x in input if x in seen or seen.add(x))
    
    return dups

class AvailableGPU():

    def __init__(self, make, counter, id, capability, name):
        self.make = make
        self.counter = counter
        self.id = id
        self.capability = capability
        self.name = name
    
    def __str__(self):
        return "> GPU no.{0}: {1} - {2} - {3} - {4}".format(self.counter,self.make,self.id,self.capability,self.name)
    
    def isCUDA(self):
        return self.make == 'CUDA'
    def isROCM(self):
        return self.make == 'ROCM'
    
    def gpuBind(self):
        
        cmd = ''
        if self.make == 'CUDA':
            cmd = 'CUDA_VISIBLE_DEVICES=' + str(self.id) + " HIP_VISIBLE_DEVICES= "
        elif self.make == 'ROCM':
            cmd = 'CUDA_VISIBLE_DEVICES= HIP_VISIBLE_DEVICES=' + str(self.id) + " "
        
        return cmd

def cleanComputeCapabilities(make, offset = 0):
        
    # Building on top of {cuda|rocm}ComputeCapabilities
    # with output:
    # ID     computeCapability    Architetcure Model Info 

    out = subprocess.run(make + "ComputeCapabilities", capture_output = True, text = True)

    if out.returncode > 0:
        return []

    gpus = []
    for f in out.stdout.split("\n"):

        if not len(f)>0:
            continue
        
        if "unsupported" in f:
            print("> Warning! Unsupported GPU:")
            print(" > " + " ".join(f))
            continue

        gpus.append(f.split())

    gpus = [AvailableGPU(make.upper(), i + offset, int(f[0]),f[1]," ".join(f[2:])) for i,f in enumerate(gpus)]

    return gpus