Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:32:42

0001 // -*- C++ -*-
0002 //
0003 // Package:     MVAComputer
0004 // Class  :     ProcLikelihood
0005 //
0006 
0007 // Implementation:
0008 //     A likelihood estimator variable processor. Reads in 0..n values for
0009 //     m variables and calculates the total signal/background likelihood
0010 //     using calibration PDFs for signal and background for each variable.
0011 //     The output variable is set to s/(s+b) (or log(s/b) for logOutput).
0012 //
0013 // Author:      Christophe Saout
0014 // Created:     Sat Apr 24 15:18 CEST 2007
0015 //
0016 
0017 #include <vector>
0018 #include <memory>
0019 #include <cmath>
0020 
0021 #include "CondFormats/PhysicsToolsObjects/interface/Histogram.h"
0022 #include "PhysicsTools/MVAComputer/interface/VarProcessor.h"
0023 #include "PhysicsTools/MVAComputer/interface/Calibration.h"
0024 #include "PhysicsTools/MVAComputer/interface/Spline.h"
0025 #include "FWCore/Utilities/interface/isFinite.h"
0026 
0027 using namespace PhysicsTools;
0028 
0029 namespace {  // anonymous
0030 
0031   class ProcLikelihood : public VarProcessor {
0032   public:
0033     typedef VarProcessor::Registry::Registry<ProcLikelihood, Calibration::ProcLikelihood> Registry;
0034 
0035     ProcLikelihood(const char *name, const Calibration::ProcLikelihood *calib, const MVAComputer *computer);
0036     ~ProcLikelihood() override {}
0037 
0038     void configure(ConfIterator iter, unsigned int n) override;
0039     void eval(ValueIterator iter, unsigned int n) const override;
0040     std::vector<double> deriv(ValueIterator iter, unsigned int n) const override;
0041 
0042   private:
0043     struct PDF {
0044       virtual ~PDF() {}
0045       virtual double eval(double value) const = 0;
0046       virtual double deriv(double value) const = 0;
0047 
0048       double norm;
0049     };
0050 
0051     struct SplinePDF : public PDF {
0052       SplinePDF(const Calibration::HistogramF *calib) : min(calib->range().min), width(calib->range().width()) {
0053         std::vector<double> values(calib->values().begin() + 1, calib->values().end() - 1);
0054         spline.set(values.size(), &values.front());
0055       }
0056 
0057       double eval(double value) const override;
0058       double deriv(double value) const override;
0059 
0060       double min, width;
0061       Spline spline;
0062     };
0063 
0064     struct HistogramPDF : public PDF {
0065       HistogramPDF(const Calibration::HistogramF *calib) : histo(calib) {}
0066 
0067       double eval(double value) const override;
0068       double deriv(double value) const override;
0069 
0070       const Calibration::HistogramF *histo;
0071     };
0072 
0073     struct SigBkg {
0074       SigBkg(const Calibration::ProcLikelihood::SigBkg &calib) {
0075         if (calib.useSplines) {
0076           signal = std::unique_ptr<PDF>(new SplinePDF(&calib.signal));
0077           background = std::unique_ptr<PDF>(new SplinePDF(&calib.background));
0078         } else {
0079           signal = std::unique_ptr<PDF>(new HistogramPDF(&calib.signal));
0080           background = std::unique_ptr<PDF>(new HistogramPDF(&calib.background));
0081         }
0082         double norm = (calib.signal.numberOfBins() + calib.background.numberOfBins()) / 2.0;
0083         signal->norm = norm;
0084         background->norm = norm;
0085       }
0086 
0087       std::unique_ptr<PDF> signal;
0088       std::unique_ptr<PDF> background;
0089     };
0090 
0091     int findPDFs(ValueIterator iter,
0092                  unsigned int n,
0093                  std::vector<SigBkg>::const_iterator &begin,
0094                  std::vector<SigBkg>::const_iterator &end) const;
0095 
0096     std::vector<SigBkg> pdfs;
0097     std::vector<double> bias;
0098     int categoryIdx;
0099     bool logOutput;
0100     bool individual;
0101     bool neverUndefined;
0102     bool keepEmpty;
0103     unsigned int nCategories;
0104   };
0105 
0106   ProcLikelihood::Registry registry("ProcLikelihood");
0107 
0108   double ProcLikelihood::SplinePDF::eval(double value) const {
0109     value = (value - min) / width;
0110     return spline.eval(value) * norm / spline.getArea();
0111   }
0112 
0113   double ProcLikelihood::SplinePDF::deriv(double value) const {
0114     value = (value - min) / width;
0115     return spline.deriv(value) * norm / spline.getArea();
0116   }
0117 
0118   double ProcLikelihood::HistogramPDF::eval(double value) const { return histo->normalizedValue(value) * norm; }
0119 
0120   double ProcLikelihood::HistogramPDF::deriv(double value) const { return 0; }
0121 
0122   ProcLikelihood::ProcLikelihood(const char *name,
0123                                  const Calibration::ProcLikelihood *calib,
0124                                  const MVAComputer *computer)
0125       : VarProcessor(name, calib, computer),
0126         pdfs(calib->pdfs.begin(), calib->pdfs.end()),
0127         bias(calib->bias),
0128         categoryIdx(calib->categoryIdx),
0129         logOutput(calib->logOutput),
0130         individual(calib->individual),
0131         neverUndefined(calib->neverUndefined),
0132         keepEmpty(calib->keepEmpty),
0133         nCategories(1) {}
0134 
0135   void ProcLikelihood::configure(ConfIterator iter, unsigned int n) {
0136     if (categoryIdx >= 0) {
0137       if ((int)n < categoryIdx + 1)
0138         return;
0139       nCategories = pdfs.size() / (n - 1);
0140       if (nCategories * (n - 1) != pdfs.size())
0141         return;
0142       if (!bias.empty() && bias.size() != nCategories)
0143         return;
0144     } else if (n != pdfs.size() || bias.size() > 1)
0145       return;
0146 
0147     int i = 0;
0148     while (iter) {
0149       if (categoryIdx == i++)
0150         iter++(Variable::FLAG_NONE);
0151       else
0152         iter++(Variable::FLAG_ALL);
0153     }
0154 
0155     if (individual) {
0156       for (unsigned int i = 0; i < pdfs.size(); i += nCategories)
0157         iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
0158     } else
0159       iter << (neverUndefined ? Variable::FLAG_NONE : Variable::FLAG_OPTIONAL);
0160   }
0161 
0162   int ProcLikelihood::findPDFs(ValueIterator iter,
0163                                unsigned int n,
0164                                std::vector<SigBkg>::const_iterator &begin,
0165                                std::vector<SigBkg>::const_iterator &end) const {
0166     int cat;
0167     if (categoryIdx >= 0) {
0168       ValueIterator iter2 = iter;
0169       for (int i = 0; i < categoryIdx; i++)
0170         ++iter2;
0171 
0172       cat = (int)*iter2;
0173       if (cat < 0 || (unsigned int)cat >= nCategories)
0174         return -1;
0175 
0176       begin = pdfs.begin() + cat * (n - 1);
0177       end = begin + (n - 1);
0178     } else {
0179       cat = 0;
0180       begin = pdfs.begin();
0181       end = pdfs.end();
0182     }
0183 
0184     return cat;
0185   }
0186 
0187   void ProcLikelihood::eval(ValueIterator iter, unsigned int n) const {
0188     std::vector<SigBkg>::const_iterator pdf, last;
0189     int cat = findPDFs(iter, n, pdf, last);
0190     int vars = 0;
0191     long double signal = bias.empty() ? 1.0 : bias[cat];
0192     long double background = 1.0;
0193 
0194     if (cat < 0) {
0195       if (individual)
0196         for (unsigned int i = 0; i < n; i++)
0197           iter();
0198       else
0199         iter();
0200       return;
0201     }
0202 
0203     for (int i = 0; pdf != last; ++iter, i++) {
0204       if (i == categoryIdx)
0205         continue;
0206 
0207       for (double *value = iter.begin(); value < iter.end(); value++) {
0208         double signalProb = std::max(0.0, pdf->signal->eval(*value));
0209         double backgroundProb = std::max(0.0, pdf->background->eval(*value));
0210         if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
0211           continue;
0212         vars++;
0213 
0214         if (individual) {
0215           signalProb *= signal;
0216           backgroundProb *= background;
0217           if (logOutput) {
0218             if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
0219               if (!neverUndefined)
0220                 continue;
0221               iter << 0.0;
0222             } else if (signalProb < 1.0e-9)
0223               iter << -99999.0;
0224             else if (backgroundProb < 1.0e-9)
0225               iter << +99999.0;
0226             else
0227               iter << (std::log(signalProb) - std::log(backgroundProb));
0228           } else {
0229             double sum = signalProb + backgroundProb;
0230             if (sum > 1.0e-9)
0231               iter << (signalProb / sum);
0232             else if (neverUndefined)
0233               iter << 0.5;
0234           }
0235         } else {
0236           signal *= signalProb;
0237           background *= backgroundProb;
0238         }
0239       }
0240 
0241       ++pdf;
0242       if (individual)
0243         iter();
0244     }
0245 
0246     if (!individual) {
0247       if (!vars || signal + background < std::exp(-7 * vars - 3)) {
0248         if (neverUndefined)
0249           iter(logOutput ? 0.0 : 0.5);
0250         else
0251           iter();
0252       } else if (logOutput) {
0253         if (signal < 1.0e-9 && background < 1.0e-9) {
0254           if (neverUndefined)
0255             iter(0.0);
0256           else
0257             iter();
0258         } else if (signal < 1.0e-9)
0259           iter(-99999.0);
0260         else if (background < 1.0e-9)
0261           iter(+99999.0);
0262         else
0263           iter(std::log(signal) - std::log(background));
0264       } else
0265         iter(signal / (signal + background));
0266     }
0267   }
0268 
0269   std::vector<double> ProcLikelihood::deriv(ValueIterator iter, unsigned int n) const {
0270     std::vector<SigBkg>::const_iterator pdf, last;
0271     int cat = findPDFs(iter, n, pdf, last);
0272     int vars = 0;
0273     long double signal = bias.empty() ? 1.0 : bias[cat];
0274     long double background = 1.0;
0275 
0276     std::vector<double> result;
0277     if (cat < 0)
0278       return result;
0279 
0280     unsigned int size = 0;
0281     for (ValueIterator iter2 = iter; iter2; ++iter2)
0282       size += iter2.size();
0283 
0284     // The logic whether a variable is used or net depends on the
0285     // evaluation, so FFS copy the whole ****
0286 
0287     if (!individual)
0288       result.resize(size);
0289 
0290     unsigned int j = 0;
0291     for (int i = 0; pdf != last; ++iter, i++) {
0292       if (i == categoryIdx) {
0293         j += iter.size();
0294         continue;
0295       }
0296 
0297       for (double *value = iter.begin(); value < iter.end(); value++, j++) {
0298         double signalProb = pdf->signal->eval(*value);
0299         double signalDiff = pdf->signal->deriv(*value);
0300         if (signalProb < 0.0)
0301           signalProb = signalDiff = 0.0;
0302 
0303         double backgroundProb = pdf->background->eval(*value);
0304         double backgroundDiff = pdf->background->deriv(*value);
0305         if (backgroundProb < 0.0)
0306           backgroundProb = backgroundDiff = 0.0;
0307 
0308         if (!keepEmpty && !individual && signalProb + backgroundProb < 1.0e-20)
0309           continue;
0310         vars++;
0311 
0312         if (individual) {
0313           signalProb *= signal;
0314           signalDiff *= signal;
0315           backgroundProb *= background;
0316           backgroundDiff *= background;
0317           if (logOutput) {
0318             if (signalProb < 1.0e-9 && backgroundProb < 1.0e-9) {
0319               if (!neverUndefined)
0320                 continue;
0321               result.resize(result.size() + size);
0322             } else if (signalProb < 1.0e-9 || backgroundProb < 1.0e-9)
0323               result.resize(result.size() + size);
0324             else {
0325               result.resize(result.size() + size);
0326               result[result.size() - size + j] = signalDiff / signalProb - backgroundDiff / backgroundProb;
0327             }
0328           } else {
0329             double sum = signalProb + backgroundProb;
0330             if (sum > 1.0e-9) {
0331               result.resize(result.size() + size);
0332               result[result.size() - size + j] =
0333                   (signalDiff * backgroundProb - signalProb * backgroundDiff) / (sum * sum);
0334             } else if (neverUndefined)
0335               result.resize(result.size() + size);
0336           }
0337         } else {
0338           signal *= signalProb;
0339           background *= backgroundProb;
0340           double s = signalDiff / signalProb;
0341           if (edm::isNotFinite(s))
0342             s = 0.0;
0343           double b = backgroundDiff / backgroundProb;
0344           if (edm::isNotFinite(b))
0345             b = 0.0;
0346 
0347           result[j] = s - b;
0348         }
0349       }
0350 
0351       ++pdf;
0352     }
0353 
0354     if (!individual) {
0355       if (!vars || signal + background < std::exp(-7 * vars - 3)) {
0356         if (neverUndefined)
0357           std::fill(result.begin(), result.end(), 0.0);
0358         else
0359           result.clear();
0360       } else if (logOutput) {
0361         if (signal < 1.0e-9 && background < 1.0e-9) {
0362           if (neverUndefined)
0363             std::fill(result.begin(), result.end(), 0.0);
0364           else
0365             result.clear();
0366         } else if (signal < 1.0e-9 || background < 1.0e-9)
0367           std::fill(result.begin(), result.end(), 0.0);
0368         else {
0369           // should be ok
0370         }
0371       } else {
0372         double factor = signal * background / ((signal + background) * (signal + background));
0373         for (std::vector<double>::iterator p = result.begin(); p != result.end(); ++p)
0374           *p *= factor;
0375       }
0376     }
0377 
0378     return result;
0379   }
0380 
0381 }  // anonymous namespace