Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 14:31:36

0001 #include "TrackingTools/GsfTools/interface/GaussianSumUtilities1D.h"
0002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0003 
0004 // #include "Utilities/Timing/interface/TimingReport.h"
0005 // #include "Utilities/Timing/interface/TimerStack.h"
0006 
0007 // #include "TROOT.h"
0008 // #include "TMath.h"
0009 #include "Math/ProbFuncMathCore.h"
0010 #include "Math/PdfFuncMathCore.h"
0011 #include "Math/QuantFuncMathCore.h"
0012 
0013 #include <iostream>
0014 #include <cmath>
0015 
0016 #include <map>
0017 #include <functional>
0018 #include <numeric>
0019 #include <algorithm>
0020 
0021 double GaussianSumUtilities1D::pdf(unsigned int i, double x) const {
0022   return weight(i) * gauss(x, mean(i), standardDeviation(i));
0023 }
0024 
0025 double GaussianSumUtilities1D::quantile(const double q) const { return ROOT::Math::gaussian_quantile(q, 1.); }
0026 
0027 // double
0028 // GaussianSumUtilities1D::quantile (const double q) const
0029 // {
0030 //   //
0031 //   // mean and sigma of highest weight component
0032 //   //
0033 //   int iwMax(-1);
0034 //   float wMax(0.);
0035 //   for ( unsigned int i=0; i<size(); i++ ) {
0036 //     if ( weight(i)>wMax ) {
0037 //       iwMax = i;
0038 //       wMax = weight(i);
0039 //     }
0040 //   }
0041 //   //
0042 //   // Find start values: begin with mean and
0043 //   // go towards f(x)=0 until two points on
0044 //   // either side are found (assumes monotonously
0045 //   // increasing function)
0046 //   //
0047 //   double x1(mean(iwMax));
0048 //   double y1(cdf(x1)-q);
0049 //   double dx = y1>0. ? -standardDeviation(iwMax) : standardDeviation(iwMax);
0050 //   double x2(x1+dx);
0051 //   double y2(cdf(x2)-q);
0052 //   while ( y1*y2>0. ) {
0053 //     x1 = x2;
0054 //     y1 = y2;
0055 //     x2 += dx;
0056 //     y2 = cdf(x2) - q;
0057 //   }
0058 //   //
0059 //   // now use bisection to find final value
0060 //   //
0061 //   double x(0.);
0062 //   while ( true ) {
0063 //     // use linear interpolation
0064 //     x = -(x2*y1-x1*y2)/(y2-y1);
0065 //     double y = cdf(x) - q;
0066 //     if ( fabs(y)<1.e-6 )  break;
0067 //     if ( y*y1>0. ) {
0068 //       y1 = y;
0069 //       x1 = x;
0070 //     }
0071 //     else {
0072 //       y2 = y;
0073 //       x2 = x;
0074 //     }
0075 //   }
0076 //   return x;
0077 // }
0078 
0079 bool GaussianSumUtilities1D::modeIsValid() const {
0080   if (theModeStatus == NotComputed)
0081     computeMode();
0082   return theModeStatus == Valid;
0083 }
0084 
0085 const SingleGaussianState1D& GaussianSumUtilities1D::mode() const {
0086   if (theModeStatus == NotComputed)
0087     computeMode();
0088   return theMode;
0089 }
0090 
0091 void GaussianSumUtilities1D::computeMode() const {
0092   //   TimerStack tstack;
0093   //   tstack.benchmark("GSU1D::benchmark",100000);
0094   //   FastTimerStackPush(tstack,"GaussianSumUtilities1D::computeMode");
0095   theModeStatus = NotValid;
0096   //
0097   // check for "degenerate" states (identical values with vanishing error)
0098   //
0099   if (theState.variance() < 1.e-14) {
0100     theMode = SingleGaussianState1D(theState.mean(), theState.variance(), theState.weight());
0101     theModeStatus = Valid;
0102     return;
0103   }
0104   //
0105   // Use means of individual components as start values.
0106   // Sort by value of pdf.
0107   //
0108   typedef std::multimap<double, int, std::greater<double> > StartMap;
0109   StartMap xStart;
0110   for (unsigned int i = 0; i < size(); i++) {
0111     xStart.insert(std::make_pair(pdf(mean(i)), i));
0112   }
0113   //
0114   // Now try with each start value
0115   //
0116   int iRes(-1);                                 // index of start component for current estimate
0117   double xRes(mean((*xStart.begin()).second));  // current estimate of mode
0118   double yRes(-1.);                             // pdf at current estimate of mode
0119   //   std::pair<double,double> result(-1.,mean((*xStart.begin()).second));
0120   for (StartMap::const_iterator i = xStart.begin(); i != xStart.end(); i++) {
0121     //
0122     // Convergence radius for a single Gaussian = 1 sigma: don't try
0123     // start values within 1 sigma of the current solution
0124     //
0125     if (theModeStatus == Valid && fabs(mean((*i).second) - mean(iRes)) / standardDeviation(iRes) < 1.)
0126       continue;
0127     //
0128     // If a solution exists: drop as soon as the pdf at
0129     // start value drops to < 75% of maximum (try to avoid
0130     // unnecessary searches for the mode)
0131     //
0132     if (theModeStatus == Valid && (*i).first / (*xStart.begin()).first < 0.75)
0133       break;
0134     //
0135     // Try to find mode
0136     //
0137     double x;
0138     double y;
0139     bool valid = findMode(x, y, mean((*i).second), standardDeviation((*i).second));
0140     //
0141     // consider only successful searches
0142     //
0143     if (valid) {  //...
0144       //
0145       // update result only for significant changes in pdf(solution)
0146       //
0147       if (yRes < 0. || (y - yRes) / (y + yRes) > 1.e-10) {
0148         iRes = (*i).second;     // store index
0149         theModeStatus = Valid;  // update status
0150         xRes = x;               // current solution
0151         yRes = y;               // and its pdf value
0152                                 //       result = std::make_pair(y,x);     // store solution and pdf(solution)
0153       }
0154     }  //...
0155   }
0156   //
0157   // check (existance of) solution
0158   //
0159   if (theModeStatus == Valid) {
0160     //
0161     // Construct single Gaussian state with
0162     //  mean = mode
0163     //  variance = local variance at mode
0164     //  weight such that the pdf's of the mixture and the
0165     //    single state are equal at the mode
0166     //
0167     double mode = xRes;
0168     double varMode = localVariance(mode);
0169     double wgtMode = pdf(mode) * sqrt(2 * M_PI * varMode);
0170     theMode = SingleGaussianState1D(mode, varMode, wgtMode);
0171   } else {
0172     //
0173     // mode finding failed: set solution to highest component
0174     //  (alternative would be global mean / variance ..?)
0175     //
0176     //two many log warnings to actually be useful - comment out
0177     //edm::LogWarning("GaussianSumUtilities") << "1D mode calculation failed";
0178     //     double x = mean();
0179     //     double y = pdf(x);
0180     //     result = std::make_pair(y,x);
0181     //     theMode = SingleGaussianState1D(mean(),variance(),weight());
0182     //
0183     // look for component with highest value at mean
0184     //
0185     int icMax(0);
0186     double ySqMax(0.);
0187     int s = size();
0188     for (int ic = 0; ic < s; ++ic) {
0189       double w = weight(ic);
0190       double ySq = w * w / variance(ic);
0191       if (ySq > ySqMax) {
0192         icMax = ic;
0193         ySqMax = ySq;
0194       }
0195     }
0196     theMode = SingleGaussianState1D(components()[icMax]);
0197   }
0198 }
0199 
0200 bool GaussianSumUtilities1D::findMode(double& xMode, double& yMode, const double& xStart, const double& scale) const {
0201   //
0202   // try with Newton on (lnPdf)'(x)
0203   //
0204   double x1(0.);
0205   double y1(0.);
0206   FinderState state(size());
0207   update(state, xStart);
0208 
0209   double xmin(xStart - 1. * scale);
0210   double xmax(xStart + 1. * scale);
0211   //
0212   // preset result
0213   //
0214   bool result(false);
0215   xMode = state.x;
0216   yMode = state.y;
0217   //
0218   // Iterate
0219   //
0220   int nLoop(0);
0221   while (nLoop++ < 20) {
0222     if (nLoop > 1 && state.yd2 < 0. &&
0223         (fabs(state.yd * scale) < 1.e-10 || fabs(state.y - y1) / (state.y + y1) < 1.e-14)) {
0224       result = true;
0225       break;
0226     }
0227     if (fabs(state.yd2) < std::numeric_limits<float>::min())
0228       state.yd2 = state.yd2 > 0. ? std::numeric_limits<float>::min() : -std::numeric_limits<float>::min();
0229     double dx = -state.yd / state.yd2;
0230     x1 = state.x;
0231     y1 = state.y;
0232     double x2 = x1 + dx;
0233     if (state.yd2 > 0. && (x2 < xmin || x2 > xmax))
0234       return false;
0235     update(state, x2);
0236   }
0237   //
0238   // result
0239   //
0240   if (result) {
0241     xMode = state.x;
0242     yMode = state.y;
0243   }
0244   return result;
0245 }
0246 
0247 void GaussianSumUtilities1D::update(FinderState& state, double x) const {
0248   state.x = x;
0249 
0250   pdfComponents(state.x, state.pdfs);
0251   state.y = pdf(state.x, state.pdfs);
0252   state.yd = 0;
0253   if (state.y > std::numeric_limits<double>::min())
0254     state.yd = d1Pdf(state.x, state.pdfs) / state.y;
0255   state.yd2 = -state.yd * state.yd;
0256   if (state.y > std::numeric_limits<double>::min())
0257     state.yd2 += d2Pdf(state.x, state.pdfs) / state.y;
0258 }
0259 
0260 double GaussianSumUtilities1D::pdf(double x) const {
0261   double result(0.);
0262   size_t s = size();
0263   for (unsigned int i = 0; i < s; i++)
0264     result += pdf(i, x);
0265   return result;
0266 }
0267 
0268 double GaussianSumUtilities1D::cdf(const double& x) const {
0269   double result(0.);
0270   size_t s = size();
0271   for (unsigned int i = 0; i < s; i++)
0272     result += weight(i) * gaussInt(x, mean(i), standardDeviation(i));
0273   return result;
0274 }
0275 
0276 double GaussianSumUtilities1D::d1Pdf(const double& x) const { return d1Pdf(x, pdfComponents(x)); }
0277 
0278 double GaussianSumUtilities1D::d2Pdf(const double& x) const { return d2Pdf(x, pdfComponents(x)); }
0279 
0280 double GaussianSumUtilities1D::d3Pdf(const double& x) const { return d3Pdf(x, pdfComponents(x)); }
0281 
0282 double GaussianSumUtilities1D::lnPdf(const double& x) const { return lnPdf(x, pdfComponents(x)); }
0283 
0284 double GaussianSumUtilities1D::d1LnPdf(const double& x) const { return d1LnPdf(x, pdfComponents(x)); }
0285 
0286 double GaussianSumUtilities1D::d2LnPdf(const double& x) const { return d2LnPdf(x, pdfComponents(x)); }
0287 
0288 std::vector<double> GaussianSumUtilities1D::pdfComponents(const double& x) const {
0289   std::vector<double> result;
0290   result.reserve(size());
0291   for (unsigned int i = 0; i < size(); i++)
0292     result.push_back(weight(i) * gauss(x, mean(i), standardDeviation(i)));
0293   return result;
0294 }
0295 
0296 namespace {
0297   struct PDF {
0298     PDF(double ix) : x(ix) {}
0299     double x;
0300     double operator()(SingleGaussianState1D const& sg) const {
0301       return sg.weight() * ROOT::Math::gaussian_pdf(x, sg.standardDeviation(), sg.mean());
0302     }
0303   };
0304 }  // namespace
0305 void GaussianSumUtilities1D::pdfComponents(double x, std::vector<double>& result) const {
0306   size_t s = size();
0307   if (s != result.size())
0308     result.resize(s);
0309   std::transform(components().begin(), components().end(), result.begin(), PDF(x));
0310 }
0311 /*
0312 void GaussianSumUtilities1D::pdfComponents (double x, std::vector<double> & result) const {
0313    size_t s = size();
0314   if (s!=result.size()) result.resize(s);
0315   double* __restrict__ v = &result.front();
0316   SingleGaussianState1D const * __restrict__ sgv = &components().front();
0317   for ( unsigned int i=0; i<s; i++ )
0318     v[i]= sgv[i].weight()*gauss(x,sgv[i].mean(),sgv[i].standardDeviation());
0319 //    result[i]=pdf(i,x);
0320 }
0321 */
0322 
0323 double GaussianSumUtilities1D::pdf(double, const std::vector<double>& pdfs) {
0324   return std::accumulate(pdfs.begin(), pdfs.end(), 0.);
0325 }
0326 
0327 double GaussianSumUtilities1D::d1Pdf(double x, const std::vector<double>& pdfs) const {
0328   double result(0.);
0329   size_t s = size();
0330   for (unsigned int i = 0; i < s; i++) {
0331     double dx = (x - mean(i)) / standardDeviation(i);
0332     result += -pdfs[i] * dx / standardDeviation(i);
0333   }
0334   return result;
0335 }
0336 
0337 double GaussianSumUtilities1D::d2Pdf(double x, const std::vector<double>& pdfs) const {
0338   double result(0.);
0339   size_t s = size();
0340   for (unsigned int i = 0; i < s; i++) {
0341     double dx = (x - mean(i)) / standardDeviation(i);
0342     result += pdfs[i] / standardDeviation(i) / standardDeviation(i) * (dx * dx - 1);
0343   }
0344   return result;
0345 }
0346 
0347 double GaussianSumUtilities1D::d3Pdf(double x, const std::vector<double>& pdfs) const {
0348   double result(0.);
0349   size_t s = size();
0350   for (unsigned int i = 0; i < s; i++) {
0351     double dx = (x - mean(i)) / standardDeviation(i);
0352     result += pdfs[i] / standardDeviation(i) / standardDeviation(i) / standardDeviation(i) * (-dx * dx + 3) * dx;
0353   }
0354   return result;
0355 }
0356 
0357 double GaussianSumUtilities1D::lnPdf(double x, const std::vector<double>& pdfs) {
0358   double f(pdf(x, pdfs));
0359   double result(-std::numeric_limits<float>::max());
0360   if (f > std::numeric_limits<double>::min())
0361     result = log(f);
0362   return result;
0363 }
0364 
0365 double GaussianSumUtilities1D::d1LnPdf(double x, const std::vector<double>& pdfs) const {
0366   double f = pdf(x, pdfs);
0367   double result(d1Pdf(x, pdfs));
0368   if (f > std::numeric_limits<double>::min())
0369     result /= f;
0370   else
0371     result = 0.;
0372   return result;
0373 }
0374 
0375 double GaussianSumUtilities1D::d2LnPdf(double x, const std::vector<double>& pdfs) const {
0376   double f = pdf(x, pdfs);
0377   double df = d1LnPdf(x, pdfs);
0378   double result(-df * df);
0379   if (f > std::numeric_limits<double>::min())
0380     result += d2Pdf(x, pdfs) / f;
0381   return result;
0382 }
0383 
0384 double GaussianSumUtilities1D::gauss(double x, double mean, double sigma) {
0385   //   const double fNorm(1./sqrt(2*M_PI));
0386   //   double result(0.);
0387 
0388   //   double d((x-mean)/sigma);
0389   //   if ( fabs(d)<20. )  result = exp(-d*d/2.);
0390   //   result *= fNorm/sigma;
0391   //   return result;
0392   return ROOT::Math::gaussian_pdf(x, sigma, mean);
0393 }
0394 
0395 double GaussianSumUtilities1D::gaussInt(double x, double mean, double sigma) {
0396   return ROOT::Math::normal_cdf(x, sigma, mean);
0397 }
0398 
0399 double GaussianSumUtilities1D::combinedMean() const {
0400   double s0(0.);
0401   double s1(0.);
0402   int s = size();
0403   SingleGaussianState1D const* __restrict__ sgv = &components().front();
0404   for (int i = 0; i < s; i++)
0405     s0 += sgv[i].weight();
0406   for (int i = 0; i < s; i++)
0407     s1 += sgv[i].weight() * sgv[i].mean();
0408   return s1 / s0;
0409 }
0410 
0411 double GaussianSumUtilities1D::localVariance(double x) const {
0412   std::vector<double> pdfs;
0413   pdfComponents(x, pdfs);
0414   double result = -pdf(x, pdfs) / d2Pdf(x, pdfs);
0415   // FIXME: wrong curvature seems to be non-existant but should add a proper recovery
0416   if (result < 0.)
0417     edm::LogWarning("GaussianSumUtilities") << "1D variance at mode < 0";
0418   return result;
0419 }