Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-02-02 16:38:00

0001 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0002 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0003 
0004 // -*- C++ -*-
0005 //
0006 // Package:    PhysicsTools/PatAlgos
0007 // Class:      BaseMVAValueMapProducer
0008 //
0009 /**\class BaseMVAValueMapProducer BaseMVAValueMapProducer.cc PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.cc
0010 
0011  Description: [one line class summary]
0012 
0013  Implementation:
0014      [Notes on implementation]
0015 */
0016 //
0017 // Original Author:  Andre Rizzi
0018 //         Created:  Mon, 07 Sep 2017 09:18:03 GMT
0019 //
0020 //
0021 
0022 // system include files
0023 #include <memory>
0024 
0025 // user include files
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/stream/EDProducer.h"
0028 
0029 #include "FWCore/Framework/interface/Event.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 
0032 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0033 #include "FWCore/Utilities/interface/StreamID.h"
0034 
0035 #include "TMVA/Factory.h"
0036 #include "TMVA/Reader.h"
0037 
0038 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0039 #include "DataFormats/Common/interface/ValueMap.h"
0040 #include "CommonTools/MVAUtils/interface/TMVAZipReader.h"
0041 #include "DataFormats/PatCandidates/interface/Jet.h"
0042 #include "DataFormats/PatCandidates/interface/Muon.h"
0043 #include "DataFormats/PatCandidates/interface/Electron.h"
0044 
0045 #include "DataFormats/PatCandidates/interface/Jet.h"
0046 #include "DataFormats/PatCandidates/interface/Muon.h"
0047 #include "DataFormats/PatCandidates/interface/Electron.h"
0048 #include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
0049 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0050 
0051 #include <string>
0052 //
0053 // class declaration
0054 //
0055 
0056 class BaseMVACache {
0057 public:
0058   BaseMVACache(const std::string& model_path, const std::string& backend, const bool disableONNXGraphOpt) {
0059     if (backend == "TF") {
0060       graph_.reset(tensorflow::loadGraphDef(model_path));
0061       tf_session_ = tensorflow::createSession(graph_.get());
0062     } else if (backend == "ONNX") {
0063       if (disableONNXGraphOpt) {
0064         Ort::SessionOptions sess_opts;
0065         sess_opts = cms::Ort::ONNXRuntime::defaultSessionOptions();
0066         sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
0067         ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path, &sess_opts);
0068       } else {
0069         ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
0070       }
0071     }
0072   }
0073   ~BaseMVACache() { tensorflow::closeSession(tf_session_); }
0074 
0075   tensorflow::Session* getTFSession() const { return tf_session_; }
0076   const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
0077 
0078 private:
0079   std::shared_ptr<tensorflow::GraphDef> graph_;
0080   tensorflow::Session* tf_session_ = nullptr;
0081   std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
0082 };
0083 
0084 template <typename T>
0085 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
0086 public:
0087   explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig, const BaseMVACache* cache)
0088       : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
0089         variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
0090         name_(iConfig.getParameter<std::string>("name")),
0091         backend_(iConfig.getParameter<std::string>("backend")),
0092         weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
0093         isClassifier_(iConfig.getParameter<bool>("isClassifier")),
0094         tmva_(backend_ == "TMVA"),
0095         tf_(backend_ == "TF"),
0096         onnx_(backend_ == "ONNX"),
0097         batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
0098     if (!(tmva_ || tf_ || onnx_)) {
0099       throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
0100     }
0101 
0102     if (tmva_)
0103       reader_ = new TMVA::Reader();
0104     edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
0105     for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
0106       funcs_.emplace_back(
0107           std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
0108     }
0109 
0110     values_.resize(variablesOrder_.size());
0111     size_t i = 0;
0112     for (const auto& v : variablesOrder_) {
0113       positions_[v] = i;
0114       if (tmva_)
0115         reader_->AddVariable(v, (&values_.front()) + i);
0116       i++;
0117     }
0118     //      reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
0119     if (tmva_) {
0120       reco::details::loadTMVAWeights(reader_, name_, weightfilename_);
0121     }
0122     if (tf_ || onnx_) {
0123       inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
0124       outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
0125       output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
0126       for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
0127         output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
0128       }
0129     }
0130 
0131     if (tmva_)
0132       produces<edm::ValueMap<float>>();
0133     else {
0134       for (const auto& n : output_names_) {
0135         produces<edm::ValueMap<float>>(n);
0136       }
0137     }
0138   }
0139   ~BaseMVAValueMapProducer() override {}
0140 
0141   void setValue(const std::string var, float val) {
0142     if (positions_.find(var) != positions_.end())
0143       values_[positions_[var]] = val;
0144   }
0145 
0146   static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
0147   static void globalEndJob(const BaseMVACache* cache);
0148 
0149   static edm::ParameterSetDescription getDescription();
0150   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0151 
0152 private:
0153   void beginStream(edm::StreamID) override{};
0154   void produce(edm::Event&, const edm::EventSetup&) override;
0155   void endStream() override{};
0156 
0157   ///to be implemented in derived classes, filling values for additional variables
0158   virtual void readAdditionalCollections(edm::Event&, const edm::EventSetup&) {}
0159   virtual void fillAdditionalVariables(const T&) {}
0160 
0161   edm::EDGetTokenT<edm::View<T>> src_;
0162   std::map<std::string, size_t> positions_;
0163   std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
0164   std::vector<std::string> variablesOrder_;
0165   std::vector<float> values_;
0166   TMVA::Reader* reader_;
0167 
0168   std::string name_;
0169   std::string backend_;
0170   std::string weightfilename_;
0171   bool isClassifier_;
0172   bool tmva_;
0173   bool tf_;
0174   bool onnx_;
0175   bool batch_eval_;
0176   std::string inputTensorName_;
0177   std::string outputTensorName_;
0178   std::vector<std::string> output_names_;
0179   std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
0180 };
0181 
0182 template <typename T>
0183 void BaseMVAValueMapProducer<T>::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0184   edm::Handle<edm::View<T>> src;
0185   iEvent.getByToken(src_, src);
0186   readAdditionalCollections(iEvent, iSetup);
0187   std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
0188   for (auto& v : mvaOut)
0189     v.reserve(src->size());
0190 
0191   if (batch_eval_) {
0192     if (!src->empty()) {
0193       std::vector<float> data;
0194       data.reserve(src->size() * positions_.size());
0195       for (auto const& o : *src) {
0196         for (auto const& p : funcs_) {
0197           setValue(p.first, p.second(o));
0198         }
0199         fillAdditionalVariables(o);
0200         data.insert(data.end(), values_.begin(), values_.end());
0201       }
0202 
0203       std::vector<float> outputs;
0204       if (tf_) {
0205         tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
0206         tensorflow::NamedTensorList input_tensors;
0207         input_tensors.resize(1);
0208         input_tensors[0] =
0209             tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0210         for (unsigned i = 0; i < data.size(); ++i) {
0211           input_tensors[0].second.flat<float>()(i) = data[i];
0212         }
0213         std::vector<tensorflow::Tensor> output_tensors;
0214         tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
0215         for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
0216           outputs.push_back(output_tensors.at(0).flat<float>()(i));
0217         }
0218       } else if (onnx_) {
0219         cms::Ort::FloatArrays inputs{data};
0220         outputs =
0221             globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
0222       }
0223 
0224       const unsigned outdim = outputs.size() / src->size();
0225       for (unsigned i = 0; i < src->size(); ++i) {
0226         std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
0227         for (size_t k = 0; k < output_names_.size(); k++) {
0228           mvaOut[k].push_back(output_formulas_[k](tmpOut));
0229         }
0230       }
0231     }
0232   } else {
0233     for (auto const& o : *src) {
0234       for (auto const& p : funcs_) {
0235         setValue(p.first, p.second(o));
0236       }
0237       fillAdditionalVariables(o);
0238       if (tmva_) {
0239         mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
0240       } else {
0241         std::vector<float> tmpOut;
0242         if (tf_) {
0243           //currently support only one input sensor to reuse the TMVA like config
0244           tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
0245           tensorflow::NamedTensorList input_tensors;
0246           input_tensors.resize(1);
0247           input_tensors[0] =
0248               tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0249           for (size_t j = 0; j < values_.size(); j++) {
0250             input_tensors[0].second.matrix<float>()(0, j) = values_[j];
0251           }
0252           std::vector<tensorflow::Tensor> outputs;
0253           tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
0254           for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
0255             tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
0256         } else if (onnx_) {
0257           cms::Ort::FloatArrays inputs{values_};
0258           tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
0259         }
0260         for (size_t k = 0; k < output_names_.size(); k++)
0261           mvaOut[k].push_back(output_formulas_[k](tmpOut));
0262       }
0263     }
0264   }
0265 
0266   size_t k = 0;
0267   for (auto& m : mvaOut) {
0268     std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
0269     edm::ValueMap<float>::Filler filler(*mvaV);
0270     filler.insert(src, m.begin(), m.end());
0271     filler.fill();
0272     iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
0273     k++;
0274   }
0275 }
0276 
0277 template <typename T>
0278 std::unique_ptr<BaseMVACache> BaseMVAValueMapProducer<T>::initializeGlobalCache(const edm::ParameterSet& cfg) {
0279   return std::make_unique<BaseMVACache>(cfg.getParameter<edm::FileInPath>("weightFile").fullPath(),
0280                                         cfg.getParameter<std::string>("backend"),
0281                                         cfg.getParameter<bool>("disableONNXGraphOpt"));
0282 }
0283 
0284 template <typename T>
0285 void BaseMVAValueMapProducer<T>::globalEndJob(const BaseMVACache* cache) {}
0286 
0287 template <typename T>
0288 edm::ParameterSetDescription BaseMVAValueMapProducer<T>::getDescription() {
0289   edm::ParameterSetDescription desc;
0290   desc.add<edm::InputTag>("src")->setComment("input physics object collection");
0291   desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
0292   desc.add<std::string>("name")->setComment("output score variable name");
0293   desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
0294   edm::ParameterSetDescription variables;
0295   variables.setAllowAnything();
0296   desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
0297   desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
0298   desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
0299   desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
0300   desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
0301   desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
0302       ->setComment("Names of the output values to be used in the output valuemap");
0303   desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
0304       ->setComment("Formulas to be used to post process the output");
0305   desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
0306   desc.add<bool>("disableONNXGraphOpt", false)->setComment("Disable ONNX runtime graph optimization");
0307 
0308   return desc;
0309 }
0310 
0311 template <typename T>
0312 void BaseMVAValueMapProducer<T>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0313   edm::ParameterSetDescription desc = getDescription();
0314   std::string modname;
0315   if (typeid(T) == typeid(pat::Jet))
0316     modname += "Jet";
0317   else if (typeid(T) == typeid(pat::Muon))
0318     modname += "Muon";
0319   else if (typeid(T) == typeid(pat::Electron))
0320     modname += "Ele";
0321   modname += "BaseMVAValueMapProducer";
0322   descriptions.add(modname, desc);
0323 }
0324 
0325 #endif