SiPixelPhase1Common/python/SpecificationBuilder_cfi.py

0001 import FWCore.ParameterSet.Config as cms
0002 from copy import deepcopy
0003
0004 # this is a pure Python module to build the deeply nested PSets that describe a
0005 # SummationSpecification.
0006 # The C++ code assumes the form is fully correct, so you should always use this,
0007 # which outputs a valid form.
0008
0009 # these need to stay in sync with the C++ enums.  TODO: Can we use Python3 enum or sth.?
0010 # Internal specification step types
0011 NO_TYPE  = cms.int32(0)
0012 GROUPBY  = cms.int32(1)  # only "SUM", real histograms
0013 EXTEND_X = cms.int32(2)  # use geometry column as coordinate axis, concatenate
0014 EXTEND_Y = cms.int32(3)
0015 COUNT    = cms.int32(4)  # drop all values, only count entries. Atm only step1.
0016 REDUCE   = cms.int32(5)  # histogram-to-scalar operator for harvesting, atm only MEAN
0017 SAVE     = cms.int32(6)  # atm not used in execution. Marks stage1/2 switch.
0018 USE_X    = cms.int32(8)  # use arg-th fill(...) parameter for the respective axis.
0019 USE_Y    = cms.int32(9)
0020 USE_Z    = cms.int32(10)
0021 PROFILE  = cms.int32(11) # marker for step1 to make a profile, related to REDUCE(MEAN)
0022
0023 # Specifications are broken down into Stages, that are executed at different
0024 # points in time (in the fill call, in pre-event harvesting (counters), in
0025 # harvesting (DQM step2)
0026 NO_STAGE = cms.int32(0)
0027 FIRST    = cms.int32(1)  # first grouping, before and/or after counting
0028 STAGE1   = cms.int32(2)  # USE/EXTEND/PROFILE for step1
0029 STAGE2   = cms.int32(3)  # REDUCE/EXTEND/GROUPBY/CUSTOM for harvesting
0030
0031 # small helpers
0032 def val(maybecms):
0033   if hasattr(maybecms, "value"):
0034     return maybecms.value()
0035   else:
0036     return maybecms
0037
0038 def parent(path):
0039   parts = val(path).split("/")
0040   return "/".join(parts[0:len(parts)-1])
0041
0042 # do not change values here, Pass in a PSet instead
0043 DefaultConf = cms.PSet(enabled = cms.bool(True))
0044
0045
0046 # The internal Specification format is very rigid and looks much less like a
0047 # program in the internal form:
0048 # - There is one entry FIRST, which is a GROUPBY or COUNT and some columns
0049 # - There is another entry FIRST, which is a GROUPBY and some columns iff
0050 #   the one before was COUNT
0051 # - There are some entries STAGE1
0052 #  - There is one entry per dimension (ordered)
0053 #  - which is either USE_* or EXTEND_*
0054 #  - with one column, that is NOT listed in FIRST.
0055 #  - There is optionally an entry PROFILE to make a profile.
0056 # - There are 0-n steps STAGE2, which are one of GROUPBY, EXTEND_X
0057 #  - The argument for GROUPBY and EXTEND_X is a subset of columns of last step
0058 #  - SAVE is ignored
0059
0060 class Specification(cms.PSet):
0061   def __init__(self, conf = DefaultConf):
0062     super(Specification, self).__init__()
0063     # these are the steps passed down to C++. Will be filled later.
0064     self.spec = cms.VPSet()
0065     # this is currently only an additional enable flag. Might add topFolder or
0066     # range there in the future.
0067     self.conf = conf
0068
0069     # these are onlly used during construction.
0070     self._activeColumns = set()
0071     self._state = FIRST
0072
0073   def __deepcopy__(self, memo):
0074     # override deepcopy to not copy .conf: it should remain a reference
0075     # w/o this it is not cleanly possible to build a per-module switch.
0076     t = Specification(self.conf)
0077     t.spec = deepcopy(self.spec, memo)
0078     return t
0079
0080   def groupBy(self, cols, mode = "SUM"):
0081     cnames = list(filter(len, val(cols).split("/"))) # omit empty items
0082     newstate = self._state
0083
0084     # The behaviour of groupBy depends a lot on when it happens:
0085     # - The first (or second, if there is per-event counting) are very special
0086     # - others in STAGE1 have to be EXTEND, and they will be translated into a
0087     #   list of exactly 3 USE/EXTEND steps (one per dimension).
0088     # - in STAGE2 they are just passed down to C++.
0089
0090     if self._state == FIRST:
0091       cname = cnames
0092       if mode != "SUM":
0093         raise Exception("First grouping must be SUM")
0094       if "Event" in cnames:
0095         cnames.remove("Event"); # per-Event grouping is done automatically
0096         t = COUNT
0097         mode =  "COUNT"
0098         newstate = FIRST
0099       else:
0100         t = GROUPBY
0101         newstate = STAGE1
0102
0103     if self._state == STAGE1:
0104       cname = self._activeColumns.difference(cnames)
0105       if len(cname) != 1:
0106         raise Exception("EXTEND must drop exactly one column.")
0107
0108       if mode == "EXTEND_X":
0109         self._x.type = EXTEND_X
0110         self._x.columns = cms.vstring(cname)
0111       elif mode == "EXTEND_Y":
0112         self._y.type = EXTEND_Y
0113         self._y.columns = cms.vstring(cname)
0114       else:
0115         raise Exception("Only EXTEND_X or EXTEND_Y allowed here, not " + mode)
0116
0117       # remove the column in the FIRST groupBy, we always re-extract in step1.
0118       c = list(cname)[0]
0119       for s in self.spec:
0120         if s.stage == FIRST and s.type == GROUPBY and c in s.columns:
0121           s.columns.remove(c)
0122       if c in self._activeColumns:
0123         self._activeColumns.remove(c)
0124       if c in self._lastColumns:
0125         self._lastColumns.remove(c)
0126
0127       return self # done here, no new step to add
0128
0129     if self._state == STAGE2:
0130       cname = cnames
0131       if self._activeColumns.issubset(cname):
0132         raise Exception("Harvesting GROUPBY must drop some columns")
0133       if mode == "EXTEND_X":
0134         t = EXTEND_X
0135       elif mode == "SUM":
0136         t = GROUPBY
0137       else:
0138         raise Exception("Currently only EXTEND_X and SUM supported in harvesting, not " + mode)
0139
0140     self._activeColumns = set(cnames)
0141     self._lastColumns = cnames
0142     self._lastMode = mode
0143
0144     self.spec.append(cms.PSet(
0145       type = t,
0146       stage = self._state,
0147       columns = cms.vstring(cname),
0148       arg = cms.string(mode),
0149       nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0150     ))
0151
0152     # In the very beginning emit standard column assignments, they will be
0153     # changed later (above and in save()) to reflect the EXTENDS given above.
0154     if newstate == STAGE1 and self._state == FIRST:
0155       self._x = cms.PSet(
0156         type = USE_X, stage = STAGE1,
0157         columns = cms.vstring(),
0158         arg = cms.string(""),
0159         nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0160       )
0161       self.spec.append(self._x)
0162       self._y = cms.PSet(
0163         type = USE_Y, stage = STAGE1,
0164         columns = cms.vstring(),
0165         arg = cms.string(""),
0166         nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0167       )
0168       self.spec.append(self._y)
0169       self._z = cms.PSet(
0170         type = USE_Z, stage = STAGE1,
0171         columns = cms.vstring(),
0172         arg = cms.string(""),
0173         nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0174       )
0175       self.spec.append(self._z)
0176
0177     self._state = newstate
0178
0179     return self
0180
0181   def reduce(self, sort):
0182     # reduce can be MEAN or COUNT. in STAGE2, just pass through.
0183     # in STAGE1, MEAN (anywhere) means make a PROFILE
0184     # COUNT can mean per-event counting or a occupancy plot, which is acheived
0185     # by ignoring the values passed to fill() (like dimensions=0, TODO).
0186     if self._state == FIRST:
0187       if sort != "COUNT":
0188         raise Exception("First statement must be groupBy.")
0189       self.spec[0].type = COUNT # this is actually a noop
0190       # groupBy already saw the "Event" column and set up counting.
0191
0192       return self
0193
0194     if self._state == STAGE1:
0195       if sort == "MEAN":
0196         self.spec.append(cms.PSet(
0197           type = PROFILE, stage = STAGE1,
0198           columns = cms.vstring(), arg = cms.string(""),
0199           nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0200         ))
0201       return self
0202
0203     if sort != "MEAN":
0204       raise Exception("Harvesting allows only reduce(MEAN) at the moment, not " + sort)
0205
0206     self.spec.append(cms.PSet(
0207       type = REDUCE,
0208       stage = self._state,
0209       columns = cms.vstring(),
0210       arg = cms.string(sort),
0211       nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0212     ))
0213     return self
0214
0215   def save(self, nbins=-1, xmin=0, xmax=0):
0216     if self._state == FIRST:
0217       raise Exception("First statement must be groupBy.")
0218
0219     if self._state == STAGE1:
0220       # end of STAGE1, fix the parameter assignments
0221       n = 1
0222       if self._x.type == USE_X:
0223         self._x.arg = cms.string(str(n))
0224         n = n+1
0225         self._x.nbins = cms.int32(nbins)
0226         self._x.xmin = cms.int32(xmin)
0227         self._x.xmax = cms.int32(xmax)
0228       if self._y.type == USE_Y:
0229         self._y.arg = cms.string(str(n))
0230         n = n+1
0231         self._y.nbins = cms.int32(nbins)
0232         self._y.xmin = cms.int32(xmin)
0233         self._y.xmax = cms.int32(xmax)
0234       if self._z.type == USE_Z:
0235         self._z.arg = cms.string(str(n))
0236         n = n+1
0237         self._z.nbins = cms.int32(nbins)
0238         self._z.xmin = cms.int32(xmin)
0239         self._z.xmax = cms.int32(xmax)
0240       # we don't know how many parameters the user wants to pass here, but the
0241       # HistogramManager knows. So we just add 3.
0242
0243     # SAVE is implicit in step1 and ignored in harvesting, so not really needed.
0244     # self.spec.append(cms.PSet(
0245     # type = SAVE,
0246     # stage = self._state,
0247     # columns = cms.vstring(),
0248     # arg = cms.string(""),
0249     # ))
0250     self._state = STAGE2
0251
0252     return self
0253
0254   def saveAll(self):
0255     # call groupBy() and save() until all colums are consumed.
0256     self.save()
0257     columns = self._lastColumns
0258     for i in range(len(columns)-1, 0, -1):
0259       cols = columns[0:i]
0260       self.groupBy("/".join(cols), self._lastMode)
0261       self.save()
0262     return self
0263
0264   # this is used for serialization, and for that this is just a PSet.
0265   def pythonTypeName(self):
0266     return 'cms.PSet';