File indexing completed on 2024-04-06 12:23:38
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <vector>
0018 #include <memory>
0019 #include <cmath>
0020
0021 #include "CondFormats/PhysicsToolsObjects/interface/Histogram.h"
0022 #include "PhysicsTools/MVAComputer/interface/VarProcessor.h"
0023 #include "PhysicsTools/MVAComputer/interface/Calibration.h"
0024 #include "PhysicsTools/MVAComputer/interface/Spline.h"
0025 #include "FWCore/Utilities/interface/isFinite.h"
0026
0027 using namespace PhysicsTools;
0028
0029 namespace {
0030
0031 class ProcLikelihood : public VarProcessor {
0032 public:
0033 typedef VarProcessor::Registry::Registry<ProcLikelihood, Calibration::ProcLikelihood> Registry;
0034
0035 ProcLikelihood(const char *name, const Calibration::ProcLikelihood *calib, const MVAComputer *computer);
0036 ~ProcLikelihood() override {}
0037
0038 void configure(ConfIterator iter, unsigned int n) override;
0039 void eval(ValueIterator iter, unsigned int n) const override;
0040 std::vector<double> deriv(ValueIterator iter, unsigned int n) const override;
0041
0042 private:
0043 struct PDF {
0044 virtual ~PDF() {}
0045 virtual double eval(double value) const = 0;
0046 virtual double deriv(double value) const = 0;
0047
0048 double norm;
0049 };
0050
0051 struct SplinePDF : public PDF {
0052 SplinePDF(const Calibration::HistogramF *calib) : min(calib->range().min), width(calib->range().width()) {
0053 std::vector<double> values(calib->values().begin() + 1, calib->values().end() - 1);
0054 spline.set(values.size(), &values.front());
0055 }
0056
0057 double eval(double value) const override;
0058 double deriv(double value) const override;
0059
0060 double min, width;
0061 Spline spline;
0062 };
0063
0064 struct HistogramPDF : public PDF {
0065 HistogramPDF(const Calibration::HistogramF *calib) : histo(calib) {}
0066
0067 double eval(double value) const override;
0068 double deriv(double value) const override;
0069
0070 const Calibration::HistogramF *histo;
0071 };
0072
0073 struct SigBkg {
0074 SigBkg(const Calibration::ProcLikelihood::SigBkg &calib) {
0075 if (calib.useSplines) {
0076 signal = std::unique_ptr<PDF>(new SplinePDF(&calib.signal));
0077 background = std::unique_ptr<PDF>(new SplinePDF(&calib.background));
0078 } else {
0079 signal = std::unique_ptr<PDF>(new HistogramPDF(&calib.signal));
0080 background = std::unique_ptr<PDF>(new HistogramPDF(&calib.background));
0081 }
0082 double norm = (calib.signal.numberOfBins() + calib.background.numberOfBins()) / 2.0;
0083 signal->norm = norm;
0084 background->norm = norm;
0085 }
0086
0087 std::unique_ptr<PDF> signal;
0088 std::unique_ptr<PDF> background;
0089 };
0090
0091 int findPDFs(ValueIterator iter,
0092 unsigned int n,
0093 std::vector<SigBkg>::const_iterator &begin,
0094 std::vector<SigBkg>::const_iterator &end) const;
0095
0096 std::vector<SigBkg> pdfs;
0097 std::vector<double> bias;
0098 int categoryIdx;
0099 bool logOutput;
0100 bool individual;
0101 bool neverUndefined;
0102 bool keepEmpty;
0103 unsigned int nCategories;
0104 };
0105
0106 ProcLikelihood::Registry registry("ProcLikelihood");
0107
0108 double ProcLikelihood::SplinePDF::eval(double value) const {
0109 value = (value - min) / width;
0110 return spline.eval(value) * norm / spline.getArea();
0111 }
0112
0113 double ProcLikelihood::SplinePDF::deriv(double value) const {
0114 value = (value - min) / width;
0115 return spline.deriv(value) * norm / spline.getArea();
0116 }
0117
0118 double ProcLikelihood::HistogramPDF::eval(double value) const { return histo->normalizedValue(value) * norm; }
0119
0120 double ProcLikelihood::HistogramPDF::deriv(double value) const { return 0; }
0121
0122 ProcLikelihood::ProcLikelihood(const char *name,
0123 const Calibration::ProcLikelihood *calib,
0124 const MVAComputer *computer)
0125 : VarProcessor(name, calib, computer),
0126 pdfs(calib->pdfs.begin(), calib->pdfs.end()),
0127 bias(calib->bias),
0128 categoryIdx(calib->categoryIdx),
0129 logOutput(calib->logOutput),
0130 individual(calib->individual),
0131 neverUndefined(calib->neverUndefined),
0132 keepEmpty(calib->keepEmpty),
0133 nCategories(1) {}
0134
0135 void ProcLikelihood::configure(ConfIterator iter, unsigned int n) {
0136 if (categoryIdx >= 0) {
0137 if ((int)n < categoryIdx + 1)
0138 return;
0139 nCategories = pdfs.size() / (n - 1);
0140 if (nCategories * (n - 1) != pdfs.size())
0141 return;
0142 if (!bias.empty() && bias.size() != nCategories)
0143 return;
0144 } else if (n != pdfs.size() || bias.size() > 1)
0145 return;
0146
0147 int i = 0;
0148 while (iter) {
0149 if (categoryIdx == i++)
0150 iter++(Variable::FLAG_NONE);
0151 else
0152 iter++(Variable::FLAG_ALL);
0153 }
0154
0155 if (individual) {
0156 for (unsigned int i = 0; i < pdfs.size(); i += nCategories)
0157 iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
0158 } else
0159 iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
0160 }
0161
0162 int ProcLikelihood::findPDFs(ValueIterator iter,
0163 unsigned int n,
0164 std::vector<SigBkg>::const_iterator &begin,
0165 std::vector<SigBkg>::const_iterator &end) const {
0166 int cat;
0167 if (categoryIdx >= 0) {
0168 ValueIterator iter2 = iter;
0169 for (int i = 0; i < categoryIdx; i++)
0170 ++iter2;
0171
0172 cat = (int)*iter2;
0173 if (cat < 0 || (unsigned int)cat >= nCategories)
0174 return -1;
0175
0176 begin = pdfs.begin() + cat * (n - 1);
0177 end = begin + (n - 1);
0178 } else {
0179 cat = 0;
0180 begin = pdfs.begin();
0181 end = pdfs.end();
0182 }
0183
0184 return cat;
0185 }
0186
0187 void ProcLikelihood::eval(ValueIterator iter, unsigned int n) const {
0188 std::vector<SigBkg>::const_iterator pdf, last;
0189 int cat = findPDFs(iter, n, pdf, last);
0190 int vars = 0;
0191 long double signal = bias.empty() ? 1.0 : bias[cat];
0192 long double background = 1.0;
0193
0194 if (cat < 0) {
0195 if (individual)
0196 for (unsigned int i = 0; i < n; i++)
0197 iter();
0198 else
0199 iter();
0200 return;
0201 }
0202
0203 for (int i = 0; pdf != last; ++iter, i++) {
0204 if (i == categoryIdx)
0205 continue;
0206
0207 for (double *value = iter.begin(); value < iter.end(); value++) {
0208 double signalProb = std::max(0.0, pdf->signal->eval(*value));
0209 double backgroundProb = std::max(0.0, pdf->background->eval(*value));
0210 if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
0211 continue;
0212 vars++;
0213
0214 if (individual) {
0215 signalProb *= signal;
0216 backgroundProb *= background;
0217 if (logOutput) {
0218 if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
0219 if (!neverUndefined)
0220 continue;
0221 iter << 0.0;
0222 } else if (signalProb < 1.0e-9)
0223 iter << -99999.0;
0224 else if (backgroundProb < 1.0e-9)
0225 iter << +99999.0;
0226 else
0227 iter << (std::log(signalProb) - std::log(backgroundProb));
0228 } else {
0229 double sum = signalProb + backgroundProb;
0230 if (sum > 1.0e-9)
0231 iter << (signalProb / sum);
0232 else if (neverUndefined)
0233 iter << 0.5;
0234 }
0235 } else {
0236 signal *= signalProb;
0237 background *= backgroundProb;
0238 }
0239 }
0240
0241 ++pdf;
0242 if (individual)
0243 iter();
0244 }
0245
0246 if (!individual) {
0247 if (!vars || signal + background < std::exp(-7 * vars - 3)) {
0248 if (neverUndefined)
0249 iter(logOutput ? 0.0 : 0.5);
0250 else
0251 iter();
0252 } else if (logOutput) {
0253 if (signal < 1.0e-9 && background < 1.0e-9) {
0254 if (neverUndefined)
0255 iter(0.0);
0256 else
0257 iter();
0258 } else if (signal < 1.0e-9)
0259 iter(-99999.0);
0260 else if (background < 1.0e-9)
0261 iter(+99999.0);
0262 else
0263 iter(std::log(signal) - std::log(background));
0264 } else
0265 iter(signal / (signal + background));
0266 }
0267 }
0268
0269 std::vector<double> ProcLikelihood::deriv(ValueIterator iter, unsigned int n) const {
0270 std::vector<SigBkg>::const_iterator pdf, last;
0271 int cat = findPDFs(iter, n, pdf, last);
0272 int vars = 0;
0273 long double signal = bias.empty() ? 1.0 : bias[cat];
0274 long double background = 1.0;
0275
0276 std::vector<double> result;
0277 if (cat < 0)
0278 return result;
0279
0280 unsigned int size = 0;
0281 for (ValueIterator iter2 = iter; iter2; ++iter2)
0282 size += iter2.size();
0283
0284
0285
0286
0287 if (!individual)
0288 result.resize(size);
0289
0290 unsigned int j = 0;
0291 for (int i = 0; pdf != last; ++iter, i++) {
0292 if (i == categoryIdx) {
0293 j += iter.size();
0294 continue;
0295 }
0296
0297 for (double *value = iter.begin(); value < iter.end(); value++, j++) {
0298 double signalProb = pdf->signal->eval(*value);
0299 double signalDiff = pdf->signal->deriv(*value);
0300 if (signalProb < 0.0)
0301 signalProb = signalDiff = 0.0;
0302
0303 double backgroundProb = pdf->background->eval(*value);
0304 double backgroundDiff = pdf->background->deriv(*value);
0305 if (backgroundProb < 0.0)
0306 backgroundProb = backgroundDiff = 0.0;
0307
0308 if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
0309 continue;
0310 vars++;
0311
0312 if (individual) {
0313 signalProb *= signal;
0314 signalDiff *= signal;
0315 backgroundProb *= background;
0316 backgroundDiff *= background;
0317 if (logOutput) {
0318 if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
0319 if (!neverUndefined)
0320 continue;
0321 result.resize(result.size() + size);
0322 } else if (signalProb < 1.0e-9 || backgroundProb < 1.0e-9)
0323 result.resize(result.size() + size);
0324 else {
0325 result.resize(result.size() + size);
0326 result[result.size() - size + j] = signalDiff / signalProb - backgroundDiff / backgroundProb;
0327 }
0328 } else {
0329 double sum = signalProb + backgroundProb;
0330 if (sum > 1.0e-9) {
0331 result.resize(result.size() + size);
0332 result[result.size() - size + j] =
0333 (signalDiff * backgroundProb - signalProb * backgroundDiff) / (sum * sum);
0334 } else if (neverUndefined)
0335 result.resize(result.size() + size);
0336 }
0337 } else {
0338 signal *= signalProb;
0339 background *= backgroundProb;
0340 double s = signalDiff / signalProb;
0341 if (edm::isNotFinite(s))
0342 s = 0.0;
0343 double b = backgroundDiff / backgroundProb;
0344 if (edm::isNotFinite(b))
0345 b = 0.0;
0346
0347 result[j] = s - b;
0348 }
0349 }
0350
0351 ++pdf;
0352 }
0353
0354 if (!individual) {
0355 if (!vars || signal + background < std::exp(-7 * vars - 3)) {
0356 if (neverUndefined)
0357 std::fill(result.begin(), result.end(), 0.0);
0358 else
0359 result.clear();
0360 } else if (logOutput) {
0361 if (signal < 1.0e-9 && background < 1.0e-9) {
0362 if (neverUndefined)
0363 std::fill(result.begin(), result.end(), 0.0);
0364 else
0365 result.clear();
0366 } else if (signal < 1.0e-9 || background < 1.0e-9)
0367 std::fill(result.begin(), result.end(), 0.0);
0368 else {
0369
0370 }
0371 } else {
0372 double factor = signal * background / ((signal + background) * (signal + background));
0373 for (std::vector<double>::iterator p = result.begin(); p != result.end(); ++p)
0374 *p *= factor;
0375 }
0376 }
0377
0378 return result;
0379 }
0380
0381 }