File indexing completed on 2023-03-17 10:56:15
0001 import FWCore.ParameterSet.Config as cms
0002 from copy import deepcopy
0003
0004
0005
0006
0007
0008
0009
0010
0011 NO_TYPE = cms.int32(0)
0012 GROUPBY = cms.int32(1)
0013 EXTEND_X = cms.int32(2)
0014 EXTEND_Y = cms.int32(3)
0015 COUNT = cms.int32(4)
0016 REDUCE = cms.int32(5)
0017 SAVE = cms.int32(6)
0018 USE_X = cms.int32(8)
0019 USE_Y = cms.int32(9)
0020 USE_Z = cms.int32(10)
0021 PROFILE = cms.int32(11)
0022
0023
0024
0025
0026 NO_STAGE = cms.int32(0)
0027 FIRST = cms.int32(1)
0028 STAGE1 = cms.int32(2)
0029 STAGE2 = cms.int32(3)
0030
0031
0032 def val(maybecms):
0033 if hasattr(maybecms, "value"):
0034 return maybecms.value()
0035 else:
0036 return maybecms
0037
0038 def parent(path):
0039 parts = val(path).split("/")
0040 return "/".join(parts[0:len(parts)-1])
0041
0042
0043 DefaultConf = cms.PSet(enabled = cms.bool(True))
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 class Specification(cms.PSet):
0061 def __init__(self, conf = DefaultConf):
0062 super(Specification, self).__init__()
0063
0064 self.spec = cms.VPSet()
0065
0066
0067 self.conf = conf
0068
0069
0070 self._activeColumns = set()
0071 self._state = FIRST
0072
0073 def __deepcopy__(self, memo):
0074
0075
0076 t = Specification(self.conf)
0077 t.spec = deepcopy(self.spec, memo)
0078 return t
0079
0080 def groupBy(self, cols, mode = "SUM"):
0081 cnames = list(filter(len, val(cols).split("/")))
0082 newstate = self._state
0083
0084
0085
0086
0087
0088
0089
0090 if self._state == FIRST:
0091 cname = cnames
0092 if mode != "SUM":
0093 raise Exception("First grouping must be SUM")
0094 if "Event" in cnames:
0095 cnames.remove("Event");
0096 t = COUNT
0097 mode = "COUNT"
0098 newstate = FIRST
0099 else:
0100 t = GROUPBY
0101 newstate = STAGE1
0102
0103 if self._state == STAGE1:
0104 cname = self._activeColumns.difference(cnames)
0105 if len(cname) != 1:
0106 raise Exception("EXTEND must drop exactly one column.")
0107
0108 if mode == "EXTEND_X":
0109 self._x.type = EXTEND_X
0110 self._x.columns = cms.vstring(cname)
0111 elif mode == "EXTEND_Y":
0112 self._y.type = EXTEND_Y
0113 self._y.columns = cms.vstring(cname)
0114 else:
0115 raise Exception("Only EXTEND_X or EXTEND_Y allowed here, not " + mode)
0116
0117
0118 c = list(cname)[0]
0119 for s in self.spec:
0120 if s.stage == FIRST and s.type == GROUPBY and c in s.columns:
0121 s.columns.remove(c)
0122 if c in self._activeColumns:
0123 self._activeColumns.remove(c)
0124 if c in self._lastColumns:
0125 self._lastColumns.remove(c)
0126
0127 return self
0128
0129 if self._state == STAGE2:
0130 cname = cnames
0131 if self._activeColumns.issubset(cname):
0132 raise Exception("Harvesting GROUPBY must drop some columns")
0133 if mode == "EXTEND_X":
0134 t = EXTEND_X
0135 elif mode == "SUM":
0136 t = GROUPBY
0137 else:
0138 raise Exception("Currently only EXTEND_X and SUM supported in harvesting, not " + mode)
0139
0140 self._activeColumns = set(cnames)
0141 self._lastColumns = cnames
0142 self._lastMode = mode
0143
0144 self.spec.append(cms.PSet(
0145 type = t,
0146 stage = self._state,
0147 columns = cms.vstring(cname),
0148 arg = cms.string(mode),
0149 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0150 ))
0151
0152
0153
0154 if newstate == STAGE1 and self._state == FIRST:
0155 self._x = cms.PSet(
0156 type = USE_X, stage = STAGE1,
0157 columns = cms.vstring(),
0158 arg = cms.string(""),
0159 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0160 )
0161 self.spec.append(self._x)
0162 self._y = cms.PSet(
0163 type = USE_Y, stage = STAGE1,
0164 columns = cms.vstring(),
0165 arg = cms.string(""),
0166 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0167 )
0168 self.spec.append(self._y)
0169 self._z = cms.PSet(
0170 type = USE_Z, stage = STAGE1,
0171 columns = cms.vstring(),
0172 arg = cms.string(""),
0173 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0174 )
0175 self.spec.append(self._z)
0176
0177 self._state = newstate
0178
0179 return self
0180
0181 def reduce(self, sort):
0182
0183
0184
0185
0186 if self._state == FIRST:
0187 if sort != "COUNT":
0188 raise Exception("First statement must be groupBy.")
0189 self.spec[0].type = COUNT
0190
0191
0192 return self
0193
0194 if self._state == STAGE1:
0195 if sort == "MEAN":
0196 self.spec.append(cms.PSet(
0197 type = PROFILE, stage = STAGE1,
0198 columns = cms.vstring(), arg = cms.string(""),
0199 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0200 ))
0201 return self
0202
0203 if sort != "MEAN":
0204 raise Exception("Harvesting allows only reduce(MEAN) at the moment, not " + sort)
0205
0206 self.spec.append(cms.PSet(
0207 type = REDUCE,
0208 stage = self._state,
0209 columns = cms.vstring(),
0210 arg = cms.string(sort),
0211 nbins = cms.int32(-1), xmin = cms.int32(0), xmax = cms.int32(0)
0212 ))
0213 return self
0214
0215 def save(self, nbins=-1, xmin=0, xmax=0):
0216 if self._state == FIRST:
0217 raise Exception("First statement must be groupBy.")
0218
0219 if self._state == STAGE1:
0220
0221 n = 1
0222 if self._x.type == USE_X:
0223 self._x.arg = cms.string(str(n))
0224 n = n+1
0225 self._x.nbins = cms.int32(nbins)
0226 self._x.xmin = cms.int32(xmin)
0227 self._x.xmax = cms.int32(xmax)
0228 if self._y.type == USE_Y:
0229 self._y.arg = cms.string(str(n))
0230 n = n+1
0231 self._y.nbins = cms.int32(nbins)
0232 self._y.xmin = cms.int32(xmin)
0233 self._y.xmax = cms.int32(xmax)
0234 if self._z.type == USE_Z:
0235 self._z.arg = cms.string(str(n))
0236 n = n+1
0237 self._z.nbins = cms.int32(nbins)
0238 self._z.xmin = cms.int32(xmin)
0239 self._z.xmax = cms.int32(xmax)
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250 self._state = STAGE2
0251
0252 return self
0253
0254 def saveAll(self):
0255
0256 self.save()
0257 columns = self._lastColumns
0258 for i in range(len(columns)-1, 0, -1):
0259 cols = columns[0:i]
0260 self.groupBy("/".join(cols), self._lastMode)
0261 self.save()
0262 return self
0263
0264
0265 def pythonTypeName(self):
0266 return 'cms.PSet';