Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:32:41

0001 //********************************************************************************
0002 //
0003 //  Description:
0004 //    DQM histogram post processor for the HLT Gen validation source module
0005 //    Given a folder name, this module will find histograms before and after
0006 //    HLT filters and produce efficiency histograms from these.
0007 //    The structure of this model is strongly inspired by the DQMGenericClient,
0008 //    replacing most user input parameters by the automatic parsing of the given directory.
0009 //
0010 //  Author: Finn Labe, UHH, Jul. 2022
0011 //          Inspired by DQMGenericClient from Junghwan Goh - SungKyunKwan University
0012 //********************************************************************************
0013 
0014 #include "FWCore/Framework/interface/MakerMacros.h"
0015 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0016 #include "FWCore/ServiceRegistry/interface/Service.h"
0017 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0018 #include "DQMServices/Core/interface/DQMEDHarvester.h"
0019 
0020 #include <TH1.h>
0021 #include <TH1F.h>
0022 #include <TH2F.h>
0023 #include <TClass.h>
0024 #include <TString.h>
0025 #include <TPRegexp.h>
0026 #include <TDirectory.h>
0027 #include <TEfficiency.h>
0028 
0029 #include <set>
0030 #include <cmath>
0031 #include <string>
0032 #include <vector>
0033 #include <climits>
0034 #include <boost/tokenizer.hpp>
0035 
0036 class HLTGenValClient : public DQMEDHarvester {
0037 public:
0038   HLTGenValClient(const edm::ParameterSet& pset);
0039   ~HLTGenValClient() override{};
0040 
0041   void dqmEndLuminosityBlock(DQMStore::IBooker& ibooker,
0042                              DQMStore::IGetter& igetter,
0043                              const edm::LuminosityBlock& lumiSeg,
0044                              const edm::EventSetup& c) override;
0045   void dqmEndRun(DQMStore::IBooker&, DQMStore::IGetter&, edm::Run const&, edm::EventSetup const&) override;
0046   void dqmEndJob(DQMStore::IBooker&, DQMStore::IGetter&) override{};
0047 
0048   struct EfficOption {
0049     std::string name, title;
0050     std::string numerator, denominator;
0051   };
0052 
0053   void computeEfficiency(DQMStore::IBooker& ibooker,
0054                          DQMStore::IGetter& igetter,
0055                          const std::string& dirName,
0056                          const std::string& efficMEName,
0057                          const std::string& efficMETitle,
0058                          const std::string& numeratorMEName,
0059                          const std::string& denominatorMEName);
0060 
0061 private:
0062   TPRegexp metacharacters_;
0063   TPRegexp nonPerlWildcard_;
0064   unsigned int verbose_;
0065   bool runOnEndLumi_;
0066   bool runOnEndJob_;
0067   bool makeGlobalEffPlot_;
0068   bool isWildcardUsed_;
0069 
0070   DQMStore* theDQM;
0071   std::vector<std::string> subDirs_;
0072   std::string outputFileName_;
0073 
0074   std::vector<EfficOption> efficOptions_;
0075 
0076   const std::string separator_ = "__";
0077 
0078   void makeAllPlots(DQMStore::IBooker&, DQMStore::IGetter&);
0079 
0080   void findAllSubdirectories(DQMStore::IBooker& ibooker,
0081                              DQMStore::IGetter& igetter,
0082                              std::string dir,
0083                              std::set<std::string>* myList,
0084                              const TString& pattern);
0085 
0086   void genericEff(TH1* denom, TH1* numer, MonitorElement* efficiencyHist);
0087 };
0088 
0089 HLTGenValClient::HLTGenValClient(const edm::ParameterSet& pset)
0090     : metacharacters_("[\\^\\$\\.\\*\\+\\?\\|\\(\\)\\{\\}\\[\\]]"), nonPerlWildcard_("\\w\\*|^\\*") {
0091   boost::escaped_list_separator<char> commonEscapes("\\", " \t", "\'");
0092 
0093   verbose_ = pset.getUntrackedParameter<unsigned int>("verbose", 0);
0094   runOnEndLumi_ = pset.getUntrackedParameter<bool>("runOnEndLumi", false);
0095   runOnEndJob_ = pset.getUntrackedParameter<bool>("runOnEndJob", true);
0096   makeGlobalEffPlot_ = pset.getUntrackedParameter<bool>("makeGlobalEffienciesPlot", true);
0097 
0098   outputFileName_ = pset.getUntrackedParameter<std::string>("outputFileName", "");
0099   subDirs_ = pset.getUntrackedParameter<std::vector<std::string>>("subDirs");
0100 
0101   isWildcardUsed_ = false;
0102 }
0103 
0104 void HLTGenValClient::dqmEndLuminosityBlock(DQMStore::IBooker& ibooker,
0105                                             DQMStore::IGetter& igetter,
0106                                             const edm::LuminosityBlock& lumiSeg,
0107                                             const edm::EventSetup& c) {
0108   if (runOnEndLumi_) {
0109     makeAllPlots(ibooker, igetter);
0110   }
0111 }
0112 
0113 void HLTGenValClient::dqmEndRun(DQMStore::IBooker& ibooker,
0114                                 DQMStore::IGetter& igetter,
0115                                 edm::Run const&,
0116                                 edm::EventSetup const&) {
0117   // Create new MEs in endRun, even though we are requested to do it in endJob.
0118   // This gives the QTests a chance to run, before summaries are created in
0119   // endJob. The negative side effect is that we cannot run the GenericClient
0120   // for plots produced in Harvesting, but that seems rather rare.
0121   //
0122   // It is important that this is still save in the presence of multiple runs,
0123   // first because in multi-run harvesting, we accumulate statistics over all
0124   // runs and have full statistics at the endRun of the last run, and second,
0125   // because we set the efficiencyFlag so any further aggregation should produce
0126   // correct results. Also, all operations should be idempotent; running them
0127   // more than once does no harm.
0128 
0129   theDQM = edm::Service<DQMStore>().operator->();
0130 
0131   if (runOnEndJob_) {
0132     makeAllPlots(ibooker, igetter);
0133   }
0134 
0135   if (!outputFileName_.empty())
0136     theDQM->save(outputFileName_);
0137 }
0138 
0139 // the main method that creates the plots
0140 void HLTGenValClient::makeAllPlots(DQMStore::IBooker& ibooker, DQMStore::IGetter& igetter) {
0141   // Process wildcard in the sub-directory
0142   std::set<std::string> subDirSet;
0143   for (auto& subDir : subDirs_) {
0144     if (subDir[subDir.size() - 1] == '/')
0145       subDir.erase(subDir.size() - 1);
0146 
0147     if (TString(subDir).Contains(metacharacters_)) {
0148       isWildcardUsed_ = true;
0149 
0150       const std::string::size_type shiftPos = subDir.rfind('/');
0151       const std::string searchPath = subDir.substr(0, shiftPos);
0152       const std::string pattern = subDir.substr(shiftPos + 1, subDir.length());
0153 
0154       findAllSubdirectories(ibooker, igetter, searchPath, &subDirSet, pattern);
0155 
0156     } else {
0157       subDirSet.insert(subDir);
0158     }
0159   }
0160 
0161   // loop through all sub-directories
0162   // from the current implementation of the HLTGenValSource, we expect all histograms in a single directory
0163   // however, this module is also capable of handling sub-directories, if needed
0164   for (std::set<std::string>::const_iterator iSubDir = subDirSet.begin(); iSubDir != subDirSet.end(); ++iSubDir) {
0165     const std::string& dirName = *iSubDir;
0166 
0167     // construct efficiency options automatically from systematically names histograms^
0168     const auto contents = igetter.getAllContents(dirName);
0169     for (const auto& content : contents) {
0170       // splitting the input string
0171       std::string name = content->getName();
0172       std::vector<std::string> seglist;
0173       size_t pos = 0;
0174       std::string token;
0175       while ((pos = name.find(separator_)) != std::string::npos) {
0176         token = name.substr(0, pos);
0177         seglist.push_back(token);
0178         name.erase(0, pos + separator_.length());
0179       }
0180       seglist.push_back(name);
0181 
0182       if (seglist.size() == 4 ||
0183           seglist.size() ==
0184               5) {  // this should be the only "proper" files we want to look at. 5 means that a custom tag was set!
0185         if (seglist.at(2) == "GEN")
0186           continue;  // this is the "before" hist, we won't create an effiency from this alone
0187 
0188         // if a fifth entry exists, it is expected to be the custom tag
0189         std::string tag = "";
0190         if (seglist.size() == 5)
0191           tag = seglist.at(4);
0192 
0193         // first we determing whether we have the 1D or 2D case
0194         if (seglist.at(3).rfind("2D", 0) == 0) {
0195           // 2D case
0196           EfficOption opt;
0197           opt.name = seglist.at(0) + separator_ + seglist.at(1) + separator_ + seglist.at(2) + separator_ +
0198                      seglist.at(3) + separator_ + "eff";  // efficiency histogram name
0199           opt.title = seglist.at(0) + " " + seglist.at(1) + " " + seglist.at(2) + " " + seglist.at(3) +
0200                       " efficiency";           // efficiency histogram title
0201           opt.numerator = content->getName();  // numerator histogram (after a filter)
0202           opt.denominator = seglist.at(0) + separator_ + seglist.at(1) + separator_ + "GEN" + separator_ +
0203                             seglist.at(3);  // denominator histogram (before all filters)
0204 
0205           efficOptions_.push_back(opt);
0206 
0207         } else {
0208           // 1D case
0209           EfficOption opt;
0210           opt.name = seglist.at(0) + separator_ + seglist.at(1) + separator_ + seglist.at(2) + separator_ +
0211                      seglist.at(3) + separator_ + "eff";  // efficiency histogram name
0212           opt.title = seglist.at(0) + " " + seglist.at(1) + " " + seglist.at(2) + " " + seglist.at(3) +
0213                       " efficiency";           // efficiency histogram title
0214           opt.numerator = content->getName();  // numerator histogram (after a filter)
0215           opt.denominator = seglist.at(0) + separator_ + seglist.at(1) + separator_ + "GEN" + separator_ +
0216                             seglist.at(3);  // denominator histogram (before all filters)
0217 
0218           // propagating the custom tag to the efficiency
0219           if (!tag.empty()) {
0220             opt.name += separator_ + tag;
0221             opt.title += " " + tag;
0222             opt.denominator += separator_ + tag;
0223           }
0224 
0225           efficOptions_.push_back(opt);
0226         }
0227       }
0228     }
0229 
0230     // now that we have all EfficOptions, we create the histograms
0231     for (const auto& efficOption : efficOptions_) {
0232       computeEfficiency(ibooker,
0233                         igetter,
0234                         dirName,
0235                         efficOption.name,
0236                         efficOption.title,
0237                         efficOption.numerator,
0238                         efficOption.denominator);
0239     }
0240   }
0241 }
0242 
0243 // main method of efficiency computation, called once for each EfficOption
0244 void HLTGenValClient::computeEfficiency(DQMStore::IBooker& ibooker,
0245                                         DQMStore::IGetter& igetter,
0246                                         const std::string& dirName,
0247                                         const std::string& efficMEName,
0248                                         const std::string& efficMETitle,
0249                                         const std::string& numeratorMEName,
0250                                         const std::string& denominatorMEName) {
0251   // checking if directory exists
0252   if (!igetter.dirExists(dirName)) {
0253     if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
0254       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0255                                        << "Cannot find sub-directory " << dirName << std::endl;
0256     }
0257     return;
0258   }
0259 
0260   ibooker.cd();
0261 
0262   // getting input MEs
0263   HLTGenValClient::MonitorElement* denominatorME = igetter.get(dirName + "/" + denominatorMEName);
0264   HLTGenValClient::MonitorElement* numeratorME = igetter.get(dirName + "/" + numeratorMEName);
0265 
0266   // checking of input MEs exist
0267   if (!denominatorME) {
0268     if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
0269       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0270                                        << "No denominator-ME '" << denominatorMEName << "' found\n";
0271     }
0272     return;
0273   }
0274   if (!numeratorME) {
0275     if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
0276       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0277                                        << "No numerator-ME '" << numeratorMEName << "' found\n";
0278     }
0279     return;
0280   }
0281 
0282   // Treat everything as the base class, TH1
0283   TH1* hDenominator = denominatorME->getTH1();
0284   TH1* hNumerator = numeratorME->getTH1();
0285 
0286   // check if TH1 extraction has succeeded
0287   if (!hDenominator || !hNumerator) {
0288     if (verbose_ >= 2 || (verbose_ == 1 && !isWildcardUsed_)) {
0289       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0290                                        << "Cannot create TH1 from ME\n";
0291     }
0292     return;
0293   }
0294 
0295   // preparing efficiency output path and name
0296   std::string efficDir = dirName;
0297   std::string newEfficMEName = efficMEName;
0298   std::string::size_type shiftPos;
0299   if (std::string::npos != (shiftPos = efficMEName.rfind('/'))) {
0300     efficDir += "/" + efficMEName.substr(0, shiftPos);
0301     newEfficMEName.erase(0, shiftPos + 1);
0302   }
0303   ibooker.setCurrentFolder(efficDir);
0304 
0305   // creating the efficiency MonitorElement
0306   HLTGenValClient::MonitorElement* efficME = nullptr;
0307 
0308   // We need to know what kind of TH1 we have
0309   // That information is obtained from the class name of the hDenominator
0310   // Then we use the appropriate booking function
0311   TH1* efficHist = static_cast<TH1*>(hDenominator->Clone(newEfficMEName.c_str()));
0312   efficHist->SetDirectory(nullptr);
0313   efficHist->SetTitle(efficMETitle.c_str());
0314   TClass* myHistClass = efficHist->IsA();
0315   std::string histClassName = myHistClass->GetName();
0316   if (histClassName == "TH1F") {
0317     efficME = ibooker.book1D(newEfficMEName, (TH1F*)efficHist);
0318   } else if (histClassName == "TH2F") {
0319     efficME = ibooker.book2D(newEfficMEName, (TH2F*)efficHist);
0320   } else if (histClassName == "TH3F") {
0321     efficME = ibooker.book3D(newEfficMEName, (TH3F*)efficHist);
0322   }
0323   delete efficHist;
0324 
0325   // checking whether efficME was succesfully created
0326   if (!efficME) {
0327     edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0328                                      << "Cannot book effic-ME from the DQM\n";
0329     return;
0330   }
0331 
0332   // actually calculating the efficiency and filling the ME
0333   genericEff(hDenominator, hNumerator, efficME);
0334   efficME->setEntries(denominatorME->getEntries());
0335 
0336   // Putting total efficiency in "GLobal efficiencies" histogram
0337   if (makeGlobalEffPlot_) {
0338     // getting global efficiency ME
0339     HLTGenValClient::MonitorElement* globalEfficME = igetter.get(efficDir + "/globalEfficiencies");
0340     if (!globalEfficME)  // in case it does not exist yet, we create it
0341       globalEfficME = ibooker.book1D("globalEfficiencies", "Global efficiencies", 1, 0, 1);
0342     if (!globalEfficME) {  // error handling in case creation failed
0343       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0344                                        << "Cannot book globalEffic-ME from the DQM\n";
0345       return;
0346     }
0347     globalEfficME->setEfficiencyFlag();
0348 
0349     // extracting histogram
0350     TH1F* hGlobalEffic = globalEfficME->getTH1F();
0351     if (!hGlobalEffic) {
0352       edm::LogError("HLTGenValClient") << "computeEfficiency() : "
0353                                        << "Cannot create TH1F from ME, globalEfficME\n";
0354       return;
0355     }
0356 
0357     // getting total counts
0358     const float nDenominatorAll = hDenominator->GetEntries();
0359     const float nNumeratorAll = hNumerator->GetEntries();
0360 
0361     // calculating total efficiency
0362     float efficAll = 0;
0363     float errorAll = 0;
0364     efficAll = nDenominatorAll ? nNumeratorAll / nDenominatorAll : 0;
0365     errorAll = nDenominatorAll && efficAll < 1 ? sqrt(efficAll * (1 - efficAll) / nDenominatorAll) : 0;
0366 
0367     // Filling the histogram bin
0368     const int iBin = hGlobalEffic->Fill(newEfficMEName.c_str(), 0);
0369     hGlobalEffic->SetBinContent(iBin, efficAll);
0370     hGlobalEffic->SetBinError(iBin, errorAll);
0371   }
0372 }
0373 
0374 // method to find all subdirectories of the given directory
0375 // goal is to fill myList with paths to all subdirectories
0376 void HLTGenValClient::findAllSubdirectories(DQMStore::IBooker& ibooker,
0377                                             DQMStore::IGetter& igetter,
0378                                             std::string dir,
0379                                             std::set<std::string>* myList,
0380                                             const TString& _pattern = TString("")) {
0381   TString patternTmp = _pattern;
0382 
0383   // checking if directory exists
0384   if (!igetter.dirExists(dir)) {
0385     edm::LogError("HLTGenValClient") << " HLTGenValClient::findAllSubdirectories ==> Missing folder " << dir << " !!!";
0386     return;
0387   }
0388 
0389   // replacing wildcards
0390   if (patternTmp != "") {
0391     if (patternTmp.Contains(nonPerlWildcard_))
0392       patternTmp.ReplaceAll("*", ".*");
0393     TPRegexp regexp(patternTmp);
0394     ibooker.cd(dir);
0395     std::vector<std::string> foundDirs = igetter.getSubdirs();
0396     for (const auto& iDir : foundDirs) {
0397       TString dirName = iDir.substr(iDir.rfind('/') + 1, iDir.length());
0398       if (dirName.Contains(regexp))
0399         findAllSubdirectories(ibooker, igetter, iDir, myList);
0400     }
0401   } else if (igetter.dirExists(dir)) {
0402     // we have found a subdirectory - adding it to the list
0403     myList->insert(dir);
0404 
0405     // moving into the found subdirectory and recursively continue
0406     ibooker.cd(dir);
0407     findAllSubdirectories(ibooker, igetter, dir, myList, "*");
0408 
0409   } else {
0410     // error handling in case found directory does not exist
0411     edm::LogError("HLTGenValClient") << "Trying to find sub-directories of " << dir << " failed because " << dir
0412                                      << " does not exist";
0413   }
0414   return;
0415 }
0416 
0417 // efficiency calculation from two histograms
0418 void HLTGenValClient::genericEff(TH1* denom, TH1* numer, MonitorElement* efficiencyHist) {
0419   // looping over all bins. Up to three dimentions can be handled
0420   // in case of less dimensions, the inner for loops are excecuted only once
0421   for (int iBinX = 1; iBinX < denom->GetNbinsX() + 1; iBinX++) {
0422     for (int iBinY = 1; iBinY < denom->GetNbinsY() + 1; iBinY++) {
0423       for (int iBinZ = 1; iBinZ < denom->GetNbinsZ() + 1; iBinZ++) {
0424         int globalBinNum = denom->GetBin(iBinX, iBinY, iBinZ);
0425 
0426         // getting numerator and denominator values
0427         float numerVal = numer->GetBinContent(globalBinNum);
0428         float denomVal = denom->GetBinContent(globalBinNum);
0429 
0430         // calculating effiency
0431         float effVal = 0;
0432         effVal = denomVal ? numerVal / denomVal : 0;
0433 
0434         // calculating error
0435         float errVal = 0;
0436         errVal = (denomVal && (effVal <= 1)) ? sqrt(effVal * (1 - effVal) / denomVal) : 0;
0437 
0438         // inserting value into the efficiency histogram
0439         efficiencyHist->setBinContent(globalBinNum, effVal);
0440         efficiencyHist->setBinError(globalBinNum, errVal);
0441         efficiencyHist->setEfficiencyFlag();
0442       }
0443     }
0444   }
0445 }
0446 
0447 DEFINE_FWK_MODULE(HLTGenValClient);