Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-08-19 01:35:02

0001 /*
0002  * TauDiscriminantCutMultiplexerT
0003  *
0004  * Authors: Evan K. Friis, UW; Sebastian Wozniewski, KIT
0005  *
0006  * Takes a PFTauDiscriminatorContainer with two raw values: The toMultiplex diescriminator is expected at rawValues[0] and the key (needed by certain discriminators) is expected at rawValues[1].
0007  *
0008  * The "key" discriminantor is rounded to the nearest integer.
0009  *
0010  * A set of cuts for different keys on the "toMultiplex" discriminantor is
0011  * provided in the config file.
0012  *
0013  * Both the key and toMultiplex discriminators should map to the same PFTau
0014  * collection.
0015  *
0016  */
0017 #include "RecoTauTag/RecoTau/interface/TauDiscriminationProducerBase.h"
0018 #include "FWCore/Utilities/interface/InputTag.h"
0019 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0020 #include "DataFormats/TauReco/interface/PFTau.h"
0021 #include "FWCore/ParameterSet/interface/FileInPath.h"
0022 
0023 #include <FWCore/ParameterSet/interface/ConfigurationDescriptions.h>
0024 #include <FWCore/ParameterSet/interface/ParameterSetDescription.h>
0025 
0026 #include <memory>
0027 
0028 #include "CondFormats/PhysicsToolsObjects/interface/PhysicsTGraphPayload.h"
0029 #include "CondFormats/DataRecord/interface/PhysicsTGraphPayloadRcd.h"
0030 #include "CondFormats/PhysicsToolsObjects/interface/PhysicsTFormulaPayload.h"
0031 #include "CondFormats/DataRecord/interface/PhysicsTFormulaPayloadRcd.h"
0032 
0033 #include "TGraph.h"
0034 #include "TFormula.h"
0035 #include "TFile.h"
0036 
0037 template <class TauType, class TauTypeRef, class ParentClass>
0038 class TauDiscriminantCutMultiplexerT : public ParentClass {
0039 public:
0040   explicit TauDiscriminantCutMultiplexerT(const edm::ParameterSet& pset);
0041 
0042   ~TauDiscriminantCutMultiplexerT() override;
0043   reco::SingleTauDiscriminatorContainer discriminate(const TauTypeRef&) const override;
0044   void beginEvent(const edm::Event& event, const edm::EventSetup& eventSetup) override;
0045 
0046   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0047 
0048 private:
0049   std::string moduleLabel_;
0050 
0051   bool loadMVAfromDB_;
0052   edm::FileInPath inputFileName_;
0053 
0054   struct DiscriminantCutEntry {
0055     DiscriminantCutEntry() : cutVariable_(), cutFunction_(), mode_(kUndefined) {}
0056     ~DiscriminantCutEntry() {}
0057     double cutValue_;
0058     std::string cutName_;
0059     edm::ESGetToken<PhysicsTGraphPayload, PhysicsTGraphPayloadRcd> cutToken_;
0060     std::unique_ptr<StringObjectFunction<TauType>> cutVariable_;
0061     std::unique_ptr<const TGraph> cutFunction_;
0062     enum { kUndefined, kFixedCut, kVariableCut };
0063     int mode_;
0064   };
0065   typedef std::map<int, std::vector<std::unique_ptr<DiscriminantCutEntry>>> DiscriminantCutMap;
0066   DiscriminantCutMap cuts_;
0067   uint n_raws_;
0068   int raw_discriminator_idx_ = -1;
0069   int raw_category_idx_ = -1;
0070 
0071   std::string mvaOutputNormalizationName_;
0072   edm::ESGetToken<PhysicsTFormulaPayload, PhysicsTFormulaPayloadRcd> formulaToken_;
0073   std::unique_ptr<const TFormula> mvaOutput_normalization_;
0074 
0075   bool isInitialized_;
0076 
0077   edm::InputTag toMultiplex_;
0078   edm::Handle<reco::TauDiscriminatorContainer> toMultiplexHandle_;
0079   edm::EDGetTokenT<reco::TauDiscriminatorContainer> toMultiplex_token;
0080 
0081   int verbosity_;
0082 };
0083 
0084 namespace {
0085   std::unique_ptr<TFile> openInputFile(const edm::FileInPath& inputFileName) {
0086     if (inputFileName.location() == edm::FileInPath::Unknown) {
0087       throw cms::Exception("TauDiscriminantCutMultiplexerT::loadObjectFromFile")
0088           << " Failed to find File = " << inputFileName << " !!\n";
0089     }
0090     return std::make_unique<TFile>(inputFileName.fullPath().data());
0091   }
0092 
0093   template <typename T>
0094   std::unique_ptr<const T> loadObjectFromFile(TFile& inputFile, const std::string& objectName) {
0095     const T* object = dynamic_cast<T*>(inputFile.Get(objectName.data()));
0096     if (!object)
0097       throw cms::Exception("TauDiscriminantCutMultiplexerT::loadObjectFromFile")
0098           << " Failed to load Object = " << objectName.data() << " from file = " << inputFile.GetName() << " !!\n";
0099     //Need to use TObject::Clone since the type T might be a base class
0100     return std::unique_ptr<const T>{static_cast<T*>(object->Clone())};
0101   }
0102 
0103   std::unique_ptr<const TGraph> loadTGraphFromDB(
0104       const edm::EventSetup& es,
0105       const std::string& graphName,
0106       const edm::ESGetToken<PhysicsTGraphPayload, PhysicsTGraphPayloadRcd>& graphToken,
0107       const int& verbosity_ = 0) {
0108     if (verbosity_) {
0109       std::cout << "<loadTGraphFromDB>:" << std::endl;
0110       std::cout << " graphName = " << graphName << std::endl;
0111     }
0112     return std::make_unique<TGraph>(es.getData(graphToken));
0113   }
0114 
0115   std::unique_ptr<TFormula> loadTFormulaFromDB(
0116       const edm::EventSetup& es,
0117       const std::string& formulaName,
0118       const edm::ESGetToken<PhysicsTFormulaPayload, PhysicsTFormulaPayloadRcd>& formulaToken_,
0119       const TString& newName,
0120       const int& verbosity_ = 0) {
0121     if (verbosity_) {
0122       std::cout << "<loadTFormulaFromDB>:" << std::endl;
0123       std::cout << " formulaName = " << formulaName << std::endl;
0124     }
0125     auto const& formulaPayload = es.getData(formulaToken_);
0126 
0127     if (formulaPayload.formulas().size() == 1 && formulaPayload.limits().size() == 1) {
0128       return std::make_unique<TFormula>(newName, formulaPayload.formulas().at(0).data());
0129     } else {
0130       throw cms::Exception("TauDiscriminantCutMultiplexerT::loadTFormulaFromDB")
0131           << "Failed to load TFormula = " << formulaName << " from Database !!\n";
0132     }
0133     return std::unique_ptr<TFormula>{};
0134   }
0135 }  // namespace
0136 
0137 template <class TauType, class TauTypeRef, class ParentClass>
0138 TauDiscriminantCutMultiplexerT<TauType, TauTypeRef, ParentClass>::TauDiscriminantCutMultiplexerT(
0139     const edm::ParameterSet& cfg)
0140     : ParentClass(cfg),
0141       moduleLabel_(cfg.getParameter<std::string>("@module_label")),
0142       mvaOutput_normalization_(),
0143       isInitialized_(false) {
0144   toMultiplex_ = cfg.getParameter<edm::InputTag>("toMultiplex");
0145   toMultiplex_token = this->template consumes<reco::TauDiscriminatorContainer>(toMultiplex_);
0146 
0147   verbosity_ = cfg.getParameter<int>("verbosity");
0148 
0149   mvaOutputNormalizationName_ = cfg.getParameter<std::string>("mvaOutput_normalization");
0150 
0151   loadMVAfromDB_ = cfg.getParameter<bool>("loadMVAfromDB");
0152   if (!loadMVAfromDB_) {
0153     inputFileName_ = cfg.getParameter<edm::FileInPath>("inputFileName");
0154   } else if (not mvaOutputNormalizationName_.empty()) {
0155     formulaToken_ = this->esConsumes(edm::ESInputTag{"", mvaOutputNormalizationName_});
0156   }
0157   if (verbosity_)
0158     std::cout << moduleLabel_ << " loadMVA = " << loadMVAfromDB_ << std::endl;
0159 
0160   // Setup our cut map, first raw values then working points
0161   typedef std::vector<edm::ParameterSet> VPSet;
0162   typedef std::vector<std::string> VString;
0163   typedef std::vector<double> VDouble;
0164   VString rawValueConfig = cfg.getParameter<VString>("rawValues");
0165   for (uint i = 0; i < rawValueConfig.size(); i++) {
0166     if (rawValueConfig[i] == "discriminator")
0167       raw_discriminator_idx_ = i;
0168     else if (rawValueConfig[i] == "category")
0169       raw_category_idx_ = i;
0170     else
0171       throw cms::Exception("TauDiscriminantCutMultiplexerT")
0172           << " Configuration Parameter 'rawValues' containes unknown values. Must be 'discriminator' or 'category'!!\n";
0173   }
0174   n_raws_ = rawValueConfig.size();
0175   VPSet mapping = cfg.getParameter<VPSet>("mapping");
0176   for (auto const& mappingEntry : mapping) {
0177     unsigned category = mappingEntry.getParameter<uint32_t>("category");
0178     std::vector<std::unique_ptr<DiscriminantCutEntry>> cutWPs;
0179     std::string categoryname = mappingEntry.getParameter<std::string>("cut");
0180     bool localWPs = false;
0181     bool wpsAsDouble = false;
0182     if (mappingEntry.exists("workingPoints")) {
0183       localWPs = true;
0184       if (mappingEntry.existsAs<VDouble>("workingPoints")) {
0185         wpsAsDouble = true;
0186       } else if (mappingEntry.existsAs<VString>("workingPoints")) {
0187         wpsAsDouble = false;
0188       } else {
0189         throw cms::Exception("TauDiscriminantCutMultiplexerT")
0190             << " Configuration Parameter 'workingPoints' must be filled with cms.string or cms.double!!\n";
0191       }
0192     } else if (cfg.exists("workingPoints")) {
0193       localWPs = false;
0194       if (cfg.existsAs<VDouble>("workingPoints")) {
0195         wpsAsDouble = true;
0196       } else if (cfg.existsAs<VString>("workingPoints")) {
0197         wpsAsDouble = false;
0198       } else {
0199         throw cms::Exception("TauDiscriminantCutMultiplexerT")
0200             << " Configuration Parameter 'workingPoints' must be filled with cms.string or cms.double!!\n";
0201       }
0202     } else {
0203       throw cms::Exception("TauDiscriminantCutMultiplexerT")
0204           << " Undefined Configuration Parameter 'workingPoints' !!\n";
0205     }
0206     if (wpsAsDouble) {
0207       VDouble workingPoints;
0208       if (localWPs)
0209         workingPoints = mappingEntry.getParameter<VDouble>("workingPoints");
0210       else
0211         workingPoints = cfg.getParameter<VDouble>("workingPoints");
0212       for (auto const& wp : workingPoints) {
0213         auto cut = std::make_unique<DiscriminantCutEntry>();
0214         cut->cutValue_ = wp;
0215         cut->mode_ = DiscriminantCutEntry::kFixedCut;
0216         cutWPs.push_back(std::move(cut));
0217       }
0218     } else {
0219       VString workingPoints;
0220       if (localWPs)
0221         workingPoints = mappingEntry.getParameter<VString>("workingPoints");
0222       else
0223         workingPoints = cfg.getParameter<VString>("workingPoints");
0224       for (auto const& wp : workingPoints) {
0225         auto cut = std::make_unique<DiscriminantCutEntry>();
0226         cut->cutName_ = categoryname + wp;
0227         if (loadMVAfromDB_) {
0228           cut->cutToken_ = this->esConsumes(edm::ESInputTag{"", cut->cutName_});
0229         }
0230         std::string cutVariable_string = mappingEntry.getParameter<std::string>("variable");
0231         cut->cutVariable_.reset(new StringObjectFunction<TauType>(cutVariable_string));
0232         cut->mode_ = DiscriminantCutEntry::kVariableCut;
0233         cutWPs.push_back(std::move(cut));
0234       }
0235     }
0236     cuts_[category] = std::move(cutWPs);
0237   }
0238 
0239   verbosity_ = cfg.getParameter<int>("verbosity");
0240   if (verbosity_)
0241     std::cout << "constructed " << moduleLabel_ << std::endl;
0242 }
0243 
0244 template <class TauType, class TauTypeRef, class ParentClass>
0245 TauDiscriminantCutMultiplexerT<TauType, TauTypeRef, ParentClass>::~TauDiscriminantCutMultiplexerT() {}
0246 
0247 template <class TauType, class TauTypeRef, class ParentClass>
0248 void TauDiscriminantCutMultiplexerT<TauType, TauTypeRef, ParentClass>::beginEvent(const edm::Event& evt,
0249                                                                                   const edm::EventSetup& es) {
0250   if (verbosity_)
0251     std::cout << " begin! " << moduleLabel_ << " " << isInitialized_ << std::endl;
0252   if (!isInitialized_) {
0253     //Only open the file once and we can close it when this routine is done
0254     // since all objects gotten from the file will have been copied
0255     std::unique_ptr<TFile> inputFile;
0256     if (!mvaOutputNormalizationName_.empty()) {
0257       if (!loadMVAfromDB_) {
0258         inputFile = openInputFile(inputFileName_);
0259         mvaOutput_normalization_ = loadObjectFromFile<TFormula>(*inputFile, mvaOutputNormalizationName_);
0260       } else {
0261         auto temp = loadTFormulaFromDB(es,
0262                                        mvaOutputNormalizationName_,
0263                                        formulaToken_,
0264                                        Form("%s_mvaOutput_normalization", moduleLabel_.data()),
0265                                        verbosity_);
0266         mvaOutput_normalization_ = std::move(temp);
0267       }
0268     }
0269     for (auto const& cutWPs : cuts_) {
0270       for (auto const& cut : cutWPs.second) {
0271         if (cut->mode_ == DiscriminantCutEntry::kVariableCut) {
0272           if (!loadMVAfromDB_) {
0273             if (not inputFile) {
0274               inputFile = openInputFile(inputFileName_);
0275             }
0276             if (verbosity_)
0277               std::cout << "Loading from file" << inputFileName_ << std::endl;
0278             cut->cutFunction_ = loadObjectFromFile<TGraph>(*inputFile, cut->cutName_);
0279           } else {
0280             if (verbosity_)
0281               std::cout << "Loading from DB" << std::endl;
0282             cut->cutFunction_ = loadTGraphFromDB(es, cut->cutName_, cut->cutToken_, verbosity_);
0283           }
0284         }
0285       }
0286     }
0287     isInitialized_ = true;
0288   }
0289 
0290   evt.getByToken(toMultiplex_token, toMultiplexHandle_);
0291 }
0292 
0293 template <class TauType, class TauTypeRef, class ParentClass>
0294 reco::SingleTauDiscriminatorContainer TauDiscriminantCutMultiplexerT<TauType, TauTypeRef, ParentClass>::discriminate(
0295     const TauTypeRef& tau) const {
0296   if (verbosity_) {
0297     std::cout << "<TauDiscriminantCutMultiplexerT::discriminate>:" << std::endl;
0298     std::cout << " moduleLabel = " << moduleLabel_ << std::endl;
0299   }
0300 
0301   reco::SingleTauDiscriminatorContainer result;
0302   result.rawValues.resize(n_raws_);
0303   double disc_result = (*toMultiplexHandle_)[tau].rawValues.at(0);
0304   if (verbosity_) {
0305     std::cout << "disc_result = " << disc_result << std::endl;
0306   }
0307   if (raw_discriminator_idx_ >= 0)
0308     result.rawValues[raw_discriminator_idx_] = disc_result;
0309   if (mvaOutput_normalization_) {
0310     disc_result = mvaOutput_normalization_->Eval(disc_result);
0311     if (verbosity_) {
0312       std::cout << "disc_result (normalized) = " << disc_result << std::endl;
0313     }
0314   }
0315   double key_result = 0.0;
0316   if ((*toMultiplexHandle_)[tau].rawValues.size() == 2) {
0317     key_result = (*toMultiplexHandle_)[tau].rawValues.at(1);
0318     if (raw_category_idx_ >= 0)
0319       result.rawValues[raw_category_idx_] = key_result;
0320   }
0321   typename DiscriminantCutMap::const_iterator cutWPsIter = cuts_.find(std::round(key_result));
0322 
0323   // Return null if it doesn't exist
0324   if (cutWPsIter == cuts_.end()) {
0325     return result;
0326   }
0327   // See if the discriminator passes our cuts
0328   for (auto const& cutIter : cutWPsIter->second) {
0329     bool passesCuts = false;
0330     if (cutIter->mode_ == DiscriminantCutEntry::kFixedCut) {
0331       passesCuts = (disc_result > cutIter->cutValue_);
0332       if (verbosity_) {
0333         std::cout << "cutValue (fixed) = " << cutIter->cutValue_ << " --> passesCuts = " << passesCuts << std::endl;
0334       }
0335     } else if (cutIter->mode_ == DiscriminantCutEntry::kVariableCut) {
0336       double cutVariable = (*cutIter->cutVariable_)(*tau);
0337       double xMin, xMax, dummy;
0338       cutIter->cutFunction_->GetPoint(0, xMin, dummy);
0339       cutIter->cutFunction_->GetPoint(cutIter->cutFunction_->GetN() - 1, xMax, dummy);
0340       const double epsilon = 1.e-3;
0341       if (cutVariable < (xMin + epsilon))
0342         cutVariable = xMin + epsilon;
0343       else if (cutVariable > (xMax - epsilon))
0344         cutVariable = xMax - epsilon;
0345       double cutValue = cutIter->cutFunction_->Eval(cutVariable);
0346       passesCuts = (disc_result > cutValue);
0347       if (verbosity_) {
0348         std::cout << "cutValue (@" << cutVariable << ") = " << cutValue << " --> passesCuts = " << passesCuts
0349                   << std::endl;
0350       }
0351     }
0352     result.workingPoints.push_back(passesCuts);
0353   }
0354   return result;
0355 }
0356 
0357 // template specialization to get the correct default config names in the following fillDescriptions
0358 template <class TauType>
0359 std::string getDefaultConfigString() {
0360   // this generic one shoudl never be called.
0361   // these are specialized in TauDiscriminationProducerBase.cc
0362   throw cms::Exception("TauDiscriminantCutMultiplexerT")
0363       << "Unsupported TauType used. You must use either PFTau or PATTau.";
0364 }
0365 
0366 template <>
0367 std::string getDefaultConfigString<reco::PFTau>() {
0368   return "recoTauDiscriminantCutMultiplexerDefault";
0369 }
0370 template <>
0371 std::string getDefaultConfigString<pat::Tau>() {
0372   return "patTauDiscriminantCutMultiplexerDefault";
0373 }
0374 
0375 template <class TauType, class TauTypeRef, class ParentClass>
0376 void TauDiscriminantCutMultiplexerT<TauType, TauTypeRef, ParentClass>::fillDescriptions(
0377     edm::ConfigurationDescriptions& descriptions) {
0378   // recoTauDiscriminantCutMultiplexer
0379   edm::ParameterSetDescription desc;
0380   desc.add<edm::InputTag>("toMultiplex", edm::InputTag("fixme"));
0381   desc.add<int>("verbosity", 0);
0382 
0383   {
0384     edm::ParameterSet pset_mapping;
0385     pset_mapping.addParameter<unsigned int>("category", 0);
0386     pset_mapping.addParameter<std::string>("cut", "fixme");
0387     edm::ParameterSetDescription desc_mapping;
0388     desc_mapping.add<unsigned int>("category", 0);
0389     desc_mapping.add<std::string>("cut");
0390     // it seems the parameter string "variable" exists only when workingPoints are string
0391     // see hpsPFTauDiscriminationByVLooseIsolationMVArun2v1DBdR03oldDMwLT in RecoTauTag/Configuration/python/HPSPFTaus_cff.py
0392     desc_mapping.addOptional<std::string>("variable")
0393         ->setComment("the parameter is required when \"workingPoints\" are string");
0394     std::vector<edm::ParameterSet> vpsd_mapping;
0395     vpsd_mapping.push_back(pset_mapping);
0396     desc.addVPSet("mapping", desc_mapping, vpsd_mapping);
0397   }
0398 
0399   std::vector<std::string> defaultRaws{"discriminator"};
0400   desc.add<std::vector<std::string>>("rawValues", defaultRaws);
0401   std::vector<double> defaultWP{0.0};
0402   desc.addNode(edm::ParameterDescription<std::vector<double>>("workingPoints", defaultWP, true) xor
0403                edm::ParameterDescription<std::vector<std::string>>("workingPoints", true));
0404   desc.add<edm::FileInPath>("inputFileName", edm::FileInPath("RecoTauTag/RecoTau/data/emptyMVAinputFile"));
0405   desc.add<bool>("loadMVAfromDB", true);
0406   ParentClass::fillProducerDescriptions(desc);  // inherited from the base
0407   desc.add<std::string>("mvaOutput_normalization", "");
0408   descriptions.add(getDefaultConfigString<TauType>(), desc);
0409 }
0410 
0411 // define our implementations
0412 typedef TauDiscriminantCutMultiplexerT<reco::PFTau, reco::PFTauRef, PFTauDiscriminationContainerProducerBase>
0413     RecoTauDiscriminantCutMultiplexer;
0414 typedef TauDiscriminantCutMultiplexerT<pat::Tau, pat::TauRef, PATTauDiscriminationContainerProducerBase>
0415     PATTauDiscriminantCutMultiplexer;
0416 
0417 DEFINE_FWK_MODULE(RecoTauDiscriminantCutMultiplexer);
0418 DEFINE_FWK_MODULE(PATTauDiscriminantCutMultiplexer);