Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:28

0001 #include <cmath>
0002 #include <array>
0003 #include <vector>
0004 #include <fstream>
0005 #include <iostream>
0006 #include <sstream>
0007 #include <iomanip>
0008 #include <memory>
0009 #include <boost/program_options.hpp>
0010 
0011 #include "TFile.h"
0012 #include "TH2F.h"
0013 
0014 namespace cluster_shape_filter {
0015 
0016   static constexpr size_t n_dim = 2;
0017 
0018   using Position = std::array<double, n_dim>;
0019 
0020   struct Limits {
0021     double down, up;
0022     Limits() : down(-std::numeric_limits<double>::infinity()), up(std::numeric_limits<double>::infinity()) {}
0023     Limits(double _down, double _up) : down(_down), up(_up) {}
0024     double size() const { return std::abs(down - up); }
0025   };
0026 
0027   using BoxLimits = std::array<Limits, n_dim>;
0028 
0029   struct Point {
0030     Position x;
0031     int weight;
0032     int cluster_index;
0033   };
0034 
0035   struct Center {
0036     Position x;
0037     int n_clusters;
0038     BoxLimits limits;
0039 
0040     double distance2(const Point& p) const {
0041       double d = 0;
0042       for (size_t n = 0; n < x.size(); ++n)
0043         d += std::pow(x[n] - p.x[n], 2);
0044       return d;
0045     }
0046 
0047     double area() const {
0048       double s = 1;
0049       for (size_t n = 0; n < limits.size(); ++n)
0050         s *= limits[n].size();
0051       return s;
0052     }
0053   };
0054 
0055   using AsymmetricCut = std::array<Center, 2>;
0056 
0057   std::ostream& operator<<(std::ostream& os, const AsymmetricCut& c) {
0058     for (size_t b = 0; b < c.size(); ++b) {
0059       for (size_t d = 0; d < c[b].limits.size(); ++d) {
0060         os << " " << c[b].limits[d].down << " " << c[b].limits[d].up;
0061       }
0062     }
0063 
0064     for (size_t b = 0; b < c.size(); ++b) {
0065       const double v = c[b].area() > 0 ? c[b].n_clusters / c[b].area() : 0.;
0066       os << " " << v << " " << c[b].n_clusters;
0067     }
0068     return os;
0069   }
0070 
0071   template <typename Object>
0072   Object* ReadObject(TFile& file, const std::string& name) {
0073     auto object = dynamic_cast<Object*>(file.Get(name.c_str()));
0074     if (!object) {
0075       std::ostringstream ss;
0076       ss << "Object '" << name << "' with type '" << typeid(Object).name() << "' not found in '" << file.GetName()
0077          << "'.";
0078       throw std::runtime_error(ss.str());
0079     }
0080     return object;
0081   }
0082 
0083   struct Arguments {
0084     std::string input, output, map_output;
0085     double loss{0.005};
0086     size_t minEntries{100};
0087     int exMax{10}, eyMax{15};
0088     bool verbose{false};
0089   };
0090 
0091   class ClusterAnalyzer {
0092   public:
0093     ClusterAnalyzer(const Arguments& _args) : args(_args) {}
0094 
0095     void analyze() {
0096       static const std::vector<std::string> subName = {"barrel", "endcap"};
0097 
0098       TFile resFile(args.input.c_str(), "READ");
0099       if (resFile.IsZombie())
0100         throw std::runtime_error("Input file not opened.");
0101 
0102       std::ofstream outFile;
0103       outFile.exceptions(std::ofstream::failbit | std::ofstream::badbit);
0104       outFile.open(args.output);
0105       std::shared_ptr<std::ofstream> mapFile;
0106       if (!args.map_output.empty()) {
0107         mapFile = std::make_shared<std::ofstream>();
0108         mapFile->exceptions(std::ofstream::failbit | std::ofstream::badbit);
0109         mapFile->open(args.map_output);
0110       }
0111 
0112       for (size_t is = 0; is < subName.size(); ++is) {
0113         for (int ix = 0; ix <= args.exMax; ++ix) {
0114           std::cout << " " << subName.at(is) << " dx= " << ix << " ";
0115           for (int iy = 0; iy <= args.eyMax; ++iy) {
0116             std::ostringstream histName;
0117             //                    histName << "hrpc_" << is << "_" << ix << "_" << iy;
0118             histName << "hspc_" << is << "_" << ix << "_" << iy;
0119             auto histo = ReadObject<TH2F>(resFile, histName.str());
0120 
0121             std::cout << ".";
0122 
0123             // Initial guess
0124             AsymmetricCut c;
0125             c[0].n_clusters = 0;
0126             c[0].x[0] = ix + 1;
0127             c[0].x[1] = iy + 1;
0128 
0129             c[1].n_clusters = 0;
0130             c[1].x[0] = -ix - 1;
0131             c[1].x[1] = -iy - 1;
0132 
0133             std::ostringstream flag;
0134             flag << subName.at(is) << "_" << ix << "_" << iy;
0135             process(*histo, c, flag.str());
0136 
0137             if (histo->Integral() >= args.minEntries) {
0138               // Fix barrel_0_0
0139               if (is == 0 && ix == 0 && iy == 0) {
0140                 c[0].limits[1].down = 0.;
0141                 c[1].limits[1].up = 0.;
0142               }
0143 
0144               outFile << is << " " << ix << " " << iy << c << std::endl;
0145             }
0146 
0147             if (mapFile)
0148               *mapFile << ix << " " << iy << " " << histo->Integral() << std::endl;
0149           }
0150           outFile << std::endl;
0151           std::cout << std::endl;
0152           if (mapFile)
0153             *mapFile << ix << " " << args.eyMax + 1 << " " << 0 << "\n" << std::endl;
0154         }
0155 
0156         if (mapFile) {
0157           for (int iy = 0; iy <= args.eyMax + 1; ++iy)
0158             *mapFile << args.exMax + 1 << " " << iy << " " << 0 << std::endl;
0159           *mapFile << std::endl << std::endl;
0160         }
0161 
0162         outFile << std::endl;
0163       }
0164     }
0165 
0166   private:
0167     using HistArray = std::array<std::array<TH1D*, 2>, 2>;
0168 
0169     HistArray CreateHistArray(const std::vector<Point>& points, const TH1D& line) const {
0170       HistArray x;
0171       for (size_t n = 0; n < x.size(); ++n) {
0172         for (size_t d = 0; d < x[n].size(); ++d) {
0173           std::ostringstream name;
0174           name << n << "_" << d;
0175           x[n][d] = dynamic_cast<TH1D*>(line.Clone(name.str().c_str()));
0176         }
0177       }
0178 
0179       for (const auto& point : points) {
0180         for (size_t d = 0; d < x[point.cluster_index].size(); ++d) {
0181           x[point.cluster_index][d]->Fill(point.x[d], point.weight);
0182         }
0183       }
0184       return x;
0185     }
0186 
0187     static double getFraction(const TH1D& line, double fraction) {
0188       double n = line.Integral();
0189       double s = 0;
0190 
0191       for (int i = 0; i <= line.GetNbinsX() + 1; ++i) {
0192         s += line.GetBinContent(i);
0193         if (s > n * fraction) {
0194           return line.GetXaxis()->GetBinUpEdge(i) -
0195                  (s - n * fraction) / line.GetBinContent(i) * line.GetXaxis()->GetBinWidth(i);
0196         }
0197       }
0198 
0199       throw std::runtime_error("Unable to determine a point for the given fraction.");
0200     }
0201 
0202     void kMeans(std::vector<Point>& points, AsymmetricCut& c, const TH1D& line) const {
0203       int changes;
0204       do {
0205         changes = 0;
0206 
0207         // Sort point into clusters
0208         for (auto& point : points) {
0209           const int n = c[0].distance2(point) < c[1].distance2(point) ? 0 : 1;
0210           if (n != point.cluster_index) {
0211             point.cluster_index = n;
0212             ++changes;
0213           }
0214         }
0215 
0216         if (changes != 0) {
0217           // Re-compute centers
0218           auto x = CreateHistArray(points, line);
0219 
0220           for (size_t n = 0; n < c.size(); ++n) {
0221             for (size_t d = 0; d < c[n].x.size(); ++d) {
0222               c[n].n_clusters = std::ceil(x[n][d]->Integral());
0223               if (x[n][d]->Integral() > 0)
0224                 c[n].x[d] = getFraction(*x[n][d], 0.5);
0225             }
0226           }
0227         }
0228       } while (changes != 0);
0229     }
0230 
0231     void findLimits(const std::vector<Point>& points, AsymmetricCut& c, const TH1D& line) const {
0232       auto x = CreateHistArray(points, line);
0233 
0234       for (size_t b = 0; b < x.size(); ++b) {
0235         for (size_t d = 0; d < x[b].size(); ++d) {
0236           if (x[b][d]->Integral() <= 0)
0237             continue;
0238           Limits best_limits;
0239           for (double f = (args.loss / 2) / 100; f < args.loss / 2 - (args.loss / 2) / 200;
0240                f += (args.loss / 2) / 100) {
0241             const Limits limits(getFraction(*x[b][d], f), getFraction(*x[b][d], 1 - (args.loss / 2 - f)));
0242             if (limits.size() < best_limits.size())
0243               best_limits = limits;
0244           }
0245           c[b].limits[d] = best_limits;
0246         }
0247       }
0248     }
0249 
0250     static void printOut(const TH2F& histo, const AsymmetricCut& c, const std::string& flag) {
0251       printToFile(histo, "points_" + flag + ".dat");
0252 
0253       std::ofstream limitsFile("limits_" + flag + ".par");
0254       for (size_t b = 0; b < c.size(); ++b) {
0255         for (size_t d = 0; d < c[b].limits.size(); ++d) {
0256           limitsFile << " l" << b << d << "=" << c[b].limits[d].down << std::endl;
0257           limitsFile << " h" << b << d << "=" << c[b].limits[d].up << std::endl;
0258         }
0259       }
0260 
0261       std::ofstream centersFile("centers_" + flag + ".dat");
0262       for (size_t b = 0; b < c.size(); ++b)
0263         centersFile << " " << c[b].x[0] << " " << c[b].x[1] << std::endl;
0264     }
0265 
0266     void process(const TH2F& histo, AsymmetricCut& c, const std::string& flag) {
0267       std::vector<Point> points;
0268       points.reserve(histo.GetNbinsX() * histo.GetNbinsY());
0269 
0270       for (int i = 1; i <= histo.GetNbinsX(); ++i) {
0271         for (int j = 1; j <= histo.GetNbinsY(); ++j) {
0272           if (histo.GetBinContent(i, j) <= 0)
0273             continue;
0274           Point p;
0275           p.x[0] = histo.GetXaxis()->GetBinCenter(i);
0276           p.x[1] = histo.GetYaxis()->GetBinCenter(j);
0277           p.weight = std::ceil(histo.GetBinContent(i, j));
0278           points.push_back(p);
0279         }
0280       }
0281 
0282       auto line = histo.ProjectionY("_py", 0, 0);
0283       line->Reset();
0284 
0285       kMeans(points, c, *line);
0286       findLimits(points, c, *line);
0287       if (args.verbose)
0288         printOut(histo, c, flag);
0289     }
0290 
0291     static void printToFile(const TH2F& h2, const std::string& fileName) {
0292       std::ofstream file;
0293       file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
0294       file.open(fileName);
0295 
0296       for (int i = 1; i <= h2.GetNbinsX(); i++) {
0297         for (int j = 1; j <= h2.GetNbinsY(); j++)
0298           file << " " << h2.GetXaxis()->GetBinLowEdge(i) << " " << h2.GetYaxis()->GetBinLowEdge(j) << " "
0299                << h2.GetBinContent(i, j) << "\n";
0300 
0301         file << " " << h2.GetXaxis()->GetBinLowEdge(i) << " " << h2.GetYaxis()->GetXmax() << " 0\n\n";
0302       }
0303 
0304       for (int j = 1; j <= h2.GetNbinsY(); j++) {
0305         file << " " << h2.GetXaxis()->GetXmax() << " " << h2.GetYaxis()->GetBinLowEdge(j) << " 0\n";
0306       }
0307 
0308       file << " " << h2.GetXaxis()->GetXmax() << " " << h2.GetYaxis()->GetXmax() << " 0" << std::endl;
0309     }
0310 
0311   private:
0312     const Arguments args;
0313   };
0314 
0315 }  // namespace cluster_shape_filter
0316 
0317 int main(int argc, char* argv[]) {
0318   namespace po = boost::program_options;
0319   using namespace cluster_shape_filter;
0320 
0321   Arguments args;
0322   po::options_description desc("Cluster shape filter analyzer");
0323   std::ostringstream ss_loss;
0324   ss_loss << std::setprecision(4) << args.loss;
0325   // clang-format off
0326   desc.add_options()
0327       ("help", "print help message")
0328       ("input", po::value<std::string>(&args.input)->required(), "input file with extracted cluster shapes")
0329       ("output", po::value<std::string>(&args.output)->required(), "output calibration file")
0330       ("map-output", po::value<std::string>(&args.map_output)->default_value(""), "output calibration file")
0331       ("loss",
0332        po::value<double>(&args.loss)->default_value(args.loss, ss_loss.str()),
0333        "(1 - efficiency) threshold for the WP definition")
0334       ("min-entries", po::value<size_t>(&args.minEntries)->default_value(args.minEntries), "")
0335       ("ex-max", po::value<int>(&args.exMax)->default_value(args.exMax), "")
0336       ("ey-max", po::value<int>(&args.eyMax)->default_value(args.eyMax), "")
0337       ("verbose", po::bool_switch(&args.verbose));
0338   // clang-format on
0339 
0340   try {
0341     po::variables_map variables;
0342     po::store(po::command_line_parser(argc, argv).options(desc).run(), variables);
0343     if (variables.count("help")) {
0344       std::cout << desc << std::endl;
0345       return 0;
0346     }
0347     po::notify(variables);
0348 
0349     cluster_shape_filter::ClusterAnalyzer analyzer(args);
0350     analyzer.analyze();
0351 
0352   } catch (po::error& e) {
0353     std::cerr << "ERROR: " << e.what() << ".\n\n" << desc << std::endl;
0354   } catch (std::exception& e) {
0355     std::cerr << "\nERROR: " << e.what() << std::endl;
0356   }
0357 
0358   return 0;
0359 }