Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:01:04

0001 #ifndef _FsmwClusterizer1D_H_
0002 #define _FsmwClusterizer1D_H_
0003 
0004 #include "CommonTools/Clustering1D/interface/Clusterizer1D.h"
0005 #include "CommonTools/Clustering1D/interface/Clusterizer1DCommons.h"
0006 #include "CommonTools/Clustering1D/interface/TrivialWeightEstimator.h"
0007 #include "CommonTools/Clustering1D/interface/Clustering1DException.h"
0008 
0009 #include <vector>
0010 #include <cmath>
0011 #include <algorithm>
0012 
0013 /** Fraction-of sample mode with weights clustering
0014  */
0015 
0016 template <class T>
0017 class FsmwClusterizer1D : public Clusterizer1D<T> {
0018 public:
0019   /** \param fraction fraction of values that will be considered to be 'in'.
0020      */
0021   FsmwClusterizer1D(double fraction = .05,
0022                     double n_sigma_in = 3.,
0023                     const WeightEstimator<T>& est = TrivialWeightEstimator<T>());
0024   FsmwClusterizer1D(const FsmwClusterizer1D&);
0025   ~FsmwClusterizer1D() override;
0026 
0027   std::pair<std::vector<Cluster1D<T> >, std::vector<const T*> > operator()(
0028       const std::vector<Cluster1D<T> >&) const override;
0029 
0030   FsmwClusterizer1D* clone() const override;
0031 
0032 private:
0033   WeightEstimator<T>* theEstimator;
0034   double theFraction;
0035   double theNSigmaIn;
0036 };
0037 
0038 /*
0039  *                              --- implementation ---
0040  *
0041  */
0042 
0043 namespace FsmwClusterizer1DNameSpace {
0044   /*
0045  *  Function that computes the 'fraction-of sample mode with weights'.
0046  *  The modefinder, that is.
0047  *  Warning, values have to be sorted in this method!!
0048  */
0049   template <class T>
0050   std::pair<typename std::vector<Cluster1D<T> >::const_iterator, typename std::vector<Cluster1D<T> >::const_iterator>
0051   fsmw(const std::vector<Cluster1D<T> >& values, double fraction) {
0052     typedef Cluster1D<T> Cluster1D;
0053     typename std::vector<Cluster1D>::const_iterator begin = values.begin();
0054     typename std::vector<Cluster1D>::const_iterator end = values.end() - 1;
0055 
0056     while (true) {
0057 #ifdef FsmwClusterizer1DDebug
0058       cout << "Begin at " << begin->position().value() << endl;
0059 #endif
0060 
0061       const int size = (int)(end - begin);
0062 #ifdef FsmwClusterizer1DDebug
0063 
0064       cout << "Size " << size << endl;
0065 #endif
0066 
0067       int stepsize = (int)floor((1 + size) * fraction);
0068       if (stepsize == 0)
0069         stepsize = 1;
0070 #ifdef FsmwClusterizer1DDebug
0071 
0072       cout << "Old end at " << end->position().value() << endl;
0073 #endif
0074 
0075       end = begin + stepsize;
0076       typename std::vector<Cluster1D>::const_iterator new_begin = begin;
0077       typename std::vector<Cluster1D>::const_iterator new_end = end;
0078 
0079 #ifdef FsmwClusterizer1DDebug
0080 
0081       cout << "New end at " << end->position().value() << endl;
0082       cout << "stepsize " << stepsize << endl;
0083 #endif
0084 
0085       // Old version: used the weights of just the end points
0086       // double totalweight = begin->weight() + end->weight();
0087 
0088       // new version: sums up the weights of all points involved
0089       // _including_ the "end" point
0090       double totalweight = end->weight();
0091       for (typename std::vector<Cluster1D>::const_iterator w = begin; w != end; ++w) {
0092         totalweight += w->weight();
0093       };
0094 
0095       double div = fabs(end->position().value() - begin->position().value()) / totalweight;
0096 #ifdef FsmwClusterizer1DDebug
0097 
0098       cout << "Div at " << begin->position().value() << ":" << (end)->position().value() << " = " << div << endl;
0099 #endif
0100 
0101       for (typename std::vector<Cluster1D>::const_iterator i = (begin + 1); i != (begin + size - stepsize + 1); ++i) {
0102         // FIXME wrong
0103         // double tmpweight = i->weight() + (i+stepsize)->weight();
0104         //
0105         // new version: sums up the weights of all points in the interval
0106         // _including_ the end point (i+stepsize)
0107         double tmpweight = 0.;
0108         for (typename std::vector<Cluster1D>::const_iterator wt = i; wt != (i + stepsize + 1); ++wt) {
0109           tmpweight += wt->weight();
0110         };
0111 
0112         double tmpdiv = fabs(i->position().value() - (i + stepsize)->position().value()) / tmpweight;
0113 #ifdef FsmwClusterizer1DDebug
0114 
0115         cout << "Div at " << i->position().value() << ":" << (i + stepsize)->position().value() << " = " << tmpdiv
0116              << endl;
0117 #endif
0118 
0119         if (tmpdiv < div) {
0120           new_begin = i;
0121           new_end = i + stepsize;
0122           div = tmpdiv;
0123         };
0124       };
0125 #ifdef FsmwClusterizer1DDebug
0126 
0127       cout << "---- new interval: " << new_begin->position().value() << ":" << new_end->position().value() << endl;
0128 #endif
0129 
0130       begin = new_begin;
0131       end = new_end;
0132       if (size < 4)
0133         break;
0134     };
0135 
0136     std::pair<typename std::vector<Cluster1D>::const_iterator, typename std::vector<Cluster1D>::const_iterator> ret(
0137         begin, end);
0138     return ret;
0139   }
0140 }  // namespace FsmwClusterizer1DNameSpace
0141 
0142 template <class T>
0143 FsmwClusterizer1D<T>::FsmwClusterizer1D(const FsmwClusterizer1D<T>& o)
0144     : theEstimator(o.theEstimator->clone()), theFraction(o.theFraction), theNSigmaIn(o.theNSigmaIn) {}
0145 
0146 template <class T>
0147 FsmwClusterizer1D<T>::FsmwClusterizer1D(double fraction, double nsig, const WeightEstimator<T>& est)
0148     : theEstimator(est.clone()), theFraction(fraction), theNSigmaIn(nsig) {}
0149 
0150 template <class T>
0151 FsmwClusterizer1D<T>::~FsmwClusterizer1D() {
0152   delete theEstimator;
0153 }
0154 
0155 template <class T>
0156 FsmwClusterizer1D<T>* FsmwClusterizer1D<T>::clone() const {
0157   return new FsmwClusterizer1D<T>(*this);
0158 }
0159 
0160 template <class T>
0161 std::pair<std::vector<Cluster1D<T> >, std::vector<const T*> > FsmwClusterizer1D<T>::operator()(
0162     const std::vector<Cluster1D<T> >& ov) const {
0163   using namespace Clusterizer1DCommons;
0164   using namespace FsmwClusterizer1DNameSpace;
0165   typedef Cluster1D<T> Cluster1D;
0166   std::vector<const T*> unusedtracks;
0167 
0168   switch (ov.size()) {
0169     case 0:
0170       throw Clustering1DException("[FsmwClusterizer1D] no values given");
0171     case 1:
0172       std::pair<std::vector<Cluster1D>, std::vector<const T*> > ret(ov, unusedtracks);
0173       return ret;
0174   };
0175 
0176   std::vector<Cluster1D> v = ov;
0177   sort(v.begin(), v.end(), ComparePairs<T>());
0178   std::vector<Cluster1D> sols;
0179 
0180   std::pair<typename std::vector<Cluster1D>::const_iterator, typename std::vector<Cluster1D>::const_iterator> estors =
0181       fsmw(v, theFraction);
0182 
0183   double weight = estors.first->weight() + estors.second->weight();
0184   double est = (estors.first->weight() * estors.first->position().value() +
0185                 estors.second->weight() * estors.second->position().value()) /
0186                weight;
0187   double err = 0.;
0188   double sigma = sqrt(square(estors.first->position().value() - est) + square(estors.second->position().value() - est));
0189   /*
0190     std::cout << "[FsmwClusterizer1D] first=" << estors.first->position().value()
0191               << " second=" << estors.second->position().value()
0192               << " est=" << est << std::endl;
0193     double sigma = sqrt ( square ( estors.first->position().error() ) +
0194                           square ( estors.second->position().error() ) );
0195     double sigma = estors.first->position().error();
0196                           */
0197   std::vector<const T*> trks;
0198   int inliers = 0;
0199 
0200   for (typename std::vector<Cluster1D>::iterator i = v.begin(); i != v.end(); ++i) {
0201     /*
0202         std::cout << "[FsmwClusterizer1D] see if they're in: delta="
0203                   << 10000 * fabs ( i->position().value() - est )
0204                   << " sigma=" << 10000 * sigma << std::endl;
0205          */
0206     if (fabs(i->position().value() - est) < theNSigmaIn * sigma) {
0207       // all within theNSigmaIn sigma are 'in'
0208       add(i->tracks(), trks);
0209       err += square(i->position().value() - est);
0210       inliers++;
0211     } else {
0212       add(i->tracks(), unusedtracks);
0213     };
0214   };
0215   err /= (inliers - 1);  // the algo definitely produces 2 or more inliers
0216   err = sqrt(err);
0217 
0218   err = sqrt(err);
0219   sols.push_back(Cluster1D(Measurement1D(est, err), trks, weight));
0220 
0221   std::pair<std::vector<Cluster1D>, std::vector<const T*> > ret(sols, unusedtracks);
0222   return ret;
0223 }
0224 
0225 #endif