Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-12-06 04:00:40

0001 // Author: Benedikt Hegner, Tom Cornelis
0002 // Email:  benedikt.hegner@cern.ch, tom.cornelis@cern.ch
0003 
0004 #include "CondCore/DBOutputService/interface/PoolDBOutputService.h"
0005 #include "CondFormats/JetMETObjects/interface/QGLikelihoodObject.h"
0006 #include "FWCore/Framework/interface/one/EDAnalyzer.h"
0007 #include "FWCore/Framework/interface/Event.h"
0008 #include "FWCore/Framework/interface/Frameworkfwd.h"
0009 #include "FWCore/Framework/interface/MakerMacros.h"
0010 #include "FWCore/ParameterSet/interface/FileInPath.h"
0011 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0012 #include "FWCore/ServiceRegistry/interface/Service.h"
0013 #include "TFile.h"
0014 #include "TH1.h"
0015 #include "TKey.h"
0016 #include "TList.h"
0017 #include "TVector.h"
0018 #include <cstdlib>
0019 #include <cstring>
0020 #include <memory>
0021 #include <sstream>
0022 #include <string>
0023 #include <vector>
0024 
0025 class QGLikelihoodDBWriter : public edm::one::EDAnalyzer<> {
0026 public:
0027   QGLikelihoodDBWriter(const edm::ParameterSet&);
0028   void beginJob() override;
0029   void analyze(const edm::Event&, const edm::EventSetup&) override {}
0030   void endJob() override {}
0031   ~QGLikelihoodDBWriter() override {}
0032 
0033 private:
0034   bool getVectorFromFile(TFile*, std::vector<float>&, const TString&);
0035   void tryToMerge(std::map<std::vector<int>, QGLikelihoodCategory>&,
0036                   std::map<std::vector<int>, TH1*>&,
0037                   std::vector<int>&,
0038                   int);
0039   QGLikelihoodObject::Histogram transformToHistogramObject(TH1*);
0040   std::string inputRootFile, payloadTag;
0041 };
0042 
0043 // Constructor
0044 QGLikelihoodDBWriter::QGLikelihoodDBWriter(const edm::ParameterSet& pSet) {
0045   inputRootFile = pSet.getParameter<std::string>("src");
0046   payloadTag = pSet.getParameter<std::string>("payload");
0047 }
0048 
0049 // Get vector from input file (includes translating TVector to std::vector)
0050 bool QGLikelihoodDBWriter::getVectorFromFile(TFile* f, std::vector<float>& vector, const TString& name) {
0051   TVectorT<float>* tVector = nullptr;
0052   f->GetObject(name, tVector);
0053   if (!tVector)
0054     return false;
0055   for (int i = 0; i < tVector->GetNoElements(); ++i)
0056     vector.push_back((*tVector)[i]);
0057   return true;
0058 }
0059 
0060 // Transform ROOT TH1 to QGLikelihoodObject (same indexing)
0061 QGLikelihoodObject::Histogram QGLikelihoodDBWriter::transformToHistogramObject(TH1* th1) {
0062   QGLikelihoodObject::Histogram histogram(
0063       th1->GetNbinsX(), th1->GetXaxis()->GetBinLowEdge(1), th1->GetXaxis()->GetBinUpEdge(th1->GetNbinsX()));
0064   for (int ibin = 0; ibin <= th1->GetNbinsX() + 1; ++ibin)
0065     histogram.setBinContent(ibin, th1->GetBinContent(ibin));
0066   return histogram;
0067 }
0068 
0069 // Try to merge bin with neighbouring bin (index = 2,3,4 for eta,pt,rho)
0070 void QGLikelihoodDBWriter::tryToMerge(std::map<std::vector<int>, QGLikelihoodCategory>& categories,
0071                                       std::map<std::vector<int>, TH1*>& pdfs,
0072                                       std::vector<int>& binNumbers,
0073                                       int index) {
0074   if (!pdfs[binNumbers])
0075     return;
0076   std::vector<int> neighbour = binNumbers;
0077   do {
0078     if (--(neighbour[index]) < 0)
0079       return;
0080   } while (!pdfs[neighbour]);
0081   if (TString(pdfs[binNumbers]->GetTitle()) != TString(pdfs[neighbour]->GetTitle()))
0082     return;
0083   if (index != 4 && categories[neighbour].RhoMax != categories[binNumbers].RhoMax)
0084     return;
0085   if (index != 4 && categories[neighbour].RhoMin != categories[binNumbers].RhoMin)
0086     return;
0087   if (index != 3 && categories[neighbour].PtMax != categories[binNumbers].PtMax)
0088     return;
0089   if (index != 3 && categories[neighbour].PtMin != categories[binNumbers].PtMin)
0090     return;
0091   if (index != 2 && categories[neighbour].EtaMax != categories[binNumbers].EtaMax)
0092     return;
0093   if (index != 2 && categories[neighbour].EtaMin != categories[binNumbers].EtaMin)
0094     return;
0095 
0096   if (index == 4)
0097     categories[neighbour].RhoMax = categories[binNumbers].RhoMax;
0098   if (index == 3)
0099     categories[neighbour].PtMax = categories[binNumbers].PtMax;
0100   if (index == 2)
0101     categories[neighbour].EtaMax = categories[binNumbers].EtaMax;
0102   pdfs.erase(binNumbers);
0103   categories.erase(binNumbers);
0104 }
0105 
0106 // Begin Job
0107 void QGLikelihoodDBWriter::beginJob() {
0108   QGLikelihoodObject payload;
0109   payload.data.clear();
0110 
0111   // Get the ROOT file
0112   TFile* f = TFile::Open(edm::FileInPath(inputRootFile.c_str()).fullPath().c_str());
0113 
0114   // The ROOT file contains the binning for each variable, needed to set up the binning grid
0115   std::map<TString, std::vector<float>> gridOfBins;
0116   for (auto&& binVariable : {"eta", "pt", "rho"}) {
0117     if (!getVectorFromFile(f, gridOfBins[binVariable], TString(binVariable) + "Bins")) {
0118       edm::LogError("NoBins") << "Missing bin information for " << binVariable << " in input file" << std::endl;
0119       return;
0120     }
0121   }
0122 
0123   // Get pdf's from file and associate them to a QGLikelihoodCategory
0124   // Some pdf's in the ROOT-file are copies from each other, with the same title: those are merged bins in pt and rho
0125   // Here we do not store the copies, but try to extend the range of the neighbouring category (will result in less comparisons during application phase)
0126   std::map<std::vector<int>, TH1*> pdfs;
0127   std::map<std::vector<int>, QGLikelihoodCategory> categories;
0128   for (auto&& type : {"gluon", "quark"}) {
0129     // Keep numbering same as in RecoJets/JetAlgorithms/src/QGLikelihoodCalculator.cc
0130     int qgIndex = strcmp(type, "gluon") == 0 ? 1 : 0;
0131     for (auto&& likelihoodVar : {"mult", "ptD", "axis2"}) {
0132       // Keep order same as in RecoJets/JetProducers/plugins/QGTagger.cc
0133       int varIndex = (strcmp(likelihoodVar, "mult") == 0 ? 0 : (strcmp(likelihoodVar, "ptD") == 0 ? 1 : 2));
0134       for (int i = 0; i < (int)gridOfBins["eta"].size() - 1; ++i) {
0135         for (int j = 0; j < (int)gridOfBins["pt"].size() - 1; ++j) {
0136           for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
0137             QGLikelihoodCategory category;
0138             category.EtaMin = gridOfBins["eta"][i];
0139             category.EtaMax = gridOfBins["eta"][i + 1];
0140             category.PtMin = gridOfBins["pt"][j];
0141             category.PtMax = gridOfBins["pt"][j + 1];
0142             category.RhoMin = gridOfBins["rho"][k];
0143             category.RhoMax = gridOfBins["rho"][k + 1];
0144             category.QGIndex = qgIndex;
0145             category.VarIndex = varIndex;
0146 
0147             TString key = TString::Format("%s/%s_%s_eta%d_pt%d_rho%d", likelihoodVar, likelihoodVar, type, i, j, k);
0148             TH1* pdf = (TH1*)f->Get(key);
0149             if (!pdf) {
0150               edm::LogError("NoPDF") << "Could not found pdf with key  " << key << " in input file" << std::endl;
0151               return;
0152             }
0153 
0154             std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
0155             pdfs[binNumbers] = pdf;
0156             categories[binNumbers] = category;
0157 
0158             tryToMerge(categories, pdfs, binNumbers, 4);
0159           }
0160           for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
0161             std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
0162             tryToMerge(categories, pdfs, binNumbers, 3);
0163           }
0164         }
0165         for (int j = 0; j < (int)gridOfBins["pt"].size() - 1; ++j) {
0166           for (int k = 0; k < (int)gridOfBins["rho"].size() - 1; ++k) {
0167             std::vector<int> binNumbers = {qgIndex, varIndex, i, j, k};
0168             tryToMerge(categories, pdfs, binNumbers, 2);
0169           }
0170         }
0171       }
0172     }
0173   }
0174 
0175   // Write all categories with their histograms to file
0176   int i = 0;
0177   for (const auto& category : categories) {
0178     QGLikelihoodObject::Entry entry;
0179     entry.category = category.second;
0180     entry.histogram = transformToHistogramObject(pdfs[category.first]);
0181     entry.mean =
0182         0;  // not used by the algorithm, is an old data member used in the past, but DB objects are not allowed to change
0183     payload.data.push_back(entry);
0184 
0185     char buff[1000];
0186     sprintf(buff,
0187             "%6d) var=%1d\t\tqg=%1d\t\teta={%5.2f,%5.2f}\t\tpt={%8.2f,%8.2f}\t\trho={%6.2f,%8.2f}",
0188             i++,
0189             category.second.VarIndex,
0190             category.second.QGIndex,
0191             category.second.EtaMin,
0192             category.second.EtaMax,
0193             category.second.PtMin,
0194             category.second.PtMax,
0195             category.second.RhoMin,
0196             category.second.RhoMax);
0197     edm::LogVerbatim("HistName") << buff << std::endl;
0198   }
0199 
0200   // Define the valid range, if no category is found within these bounds a warning will be thrown
0201   payload.qgValidRange.EtaMin = gridOfBins["eta"].front();
0202   payload.qgValidRange.EtaMax = gridOfBins["eta"].back();
0203   payload.qgValidRange.PtMin = gridOfBins["pt"].front();
0204   payload.qgValidRange.PtMax = gridOfBins["pt"].back();
0205   payload.qgValidRange.RhoMin = gridOfBins["rho"].front();
0206   payload.qgValidRange.RhoMax = gridOfBins["rho"].back();
0207   payload.qgValidRange.QGIndex = -1;
0208   payload.qgValidRange.VarIndex = -1;
0209 
0210   // Now write it into the DB
0211   edm::LogInfo("UserOutput") << "Opening PoolDBOutputService" << std::endl;
0212 
0213   edm::Service<cond::service::PoolDBOutputService> s;
0214   if (s.isAvailable()) {
0215     edm::LogInfo("UserOutput") << "Setting up payload with " << payload.data.size() << " entries and tag " << payloadTag
0216                                << std::endl;
0217     s->writeOneIOV(payload, s->beginOfTime(), payloadTag);
0218   }
0219   edm::LogInfo("UserOutput") << "Wrote in CondDB QGLikelihood payload label: " << payloadTag << std::endl;
0220 }
0221 
0222 DEFINE_FWK_MODULE(QGLikelihoodDBWriter);