Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-10-25 04:55:23

0001 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0002 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0003 
0004 // -*- C++ -*-
0005 //
0006 // Package:    PhysicsTools/PatAlgos
0007 // Class:      BaseMVAValueMapProducer
0008 //
0009 /**\class BaseMVAValueMapProducer BaseMVAValueMapProducer.cc PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.cc
0010 
0011  Description: [one line class summary]
0012 
0013  Implementation:
0014      [Notes on implementation]
0015 */
0016 //
0017 // Original Author:  Andre Rizzi
0018 //         Created:  Mon, 07 Sep 2017 09:18:03 GMT
0019 //
0020 //
0021 
0022 // system include files
0023 #include <memory>
0024 
0025 // user include files
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/stream/EDProducer.h"
0028 
0029 #include "FWCore/Framework/interface/Event.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 
0032 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0033 #include "FWCore/Utilities/interface/StreamID.h"
0034 
0035 #include "TMVA/Factory.h"
0036 #include "TMVA/Reader.h"
0037 
0038 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0039 #include "DataFormats/Common/interface/ValueMap.h"
0040 #include "CommonTools/MVAUtils/interface/TMVAZipReader.h"
0041 #include "DataFormats/PatCandidates/interface/Jet.h"
0042 #include "DataFormats/PatCandidates/interface/Muon.h"
0043 #include "DataFormats/PatCandidates/interface/Electron.h"
0044 
0045 #include "DataFormats/PatCandidates/interface/Jet.h"
0046 #include "DataFormats/PatCandidates/interface/Muon.h"
0047 #include "DataFormats/PatCandidates/interface/Electron.h"
0048 #include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
0049 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0050 
0051 #include <string>
0052 //
0053 // class declaration
0054 //
0055 
0056 class BaseMVACache {
0057 public:
0058   BaseMVACache(const std::string& model_path, const std::string& backend) {
0059     if (backend == "TF") {
0060       graph_.reset(tensorflow::loadGraphDef(model_path));
0061       tf_session_ = tensorflow::createSession(graph_.get());
0062     } else if (backend == "ONNX") {
0063       ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
0064     }
0065   }
0066   ~BaseMVACache() { tensorflow::closeSession(tf_session_); }
0067 
0068   tensorflow::Session* getTFSession() const { return tf_session_; }
0069   const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
0070 
0071 private:
0072   std::shared_ptr<tensorflow::GraphDef> graph_;
0073   tensorflow::Session* tf_session_ = nullptr;
0074   std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
0075 };
0076 
0077 template <typename T>
0078 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
0079 public:
0080   explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig, const BaseMVACache* cache)
0081       : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
0082         variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
0083         name_(iConfig.getParameter<std::string>("name")),
0084         backend_(iConfig.getParameter<std::string>("backend")),
0085         weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
0086         isClassifier_(iConfig.getParameter<bool>("isClassifier")),
0087         tmva_(backend_ == "TMVA"),
0088         tf_(backend_ == "TF"),
0089         onnx_(backend_ == "ONNX"),
0090         batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
0091     if (!(tmva_ || tf_ || onnx_)) {
0092       throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
0093     }
0094 
0095     if (tmva_)
0096       reader_ = new TMVA::Reader();
0097     edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
0098     for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
0099       funcs_.emplace_back(
0100           std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
0101     }
0102 
0103     values_.resize(variablesOrder_.size());
0104     size_t i = 0;
0105     for (const auto& v : variablesOrder_) {
0106       positions_[v] = i;
0107       if (tmva_)
0108         reader_->AddVariable(v, (&values_.front()) + i);
0109       i++;
0110     }
0111     //      reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
0112     if (tmva_) {
0113       reco::details::loadTMVAWeights(reader_, name_, weightfilename_);
0114     }
0115     if (tf_ || onnx_) {
0116       inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
0117       outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
0118       output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
0119       for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
0120         output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
0121       }
0122     }
0123 
0124     if (tmva_)
0125       produces<edm::ValueMap<float>>();
0126     else {
0127       for (const auto& n : output_names_) {
0128         produces<edm::ValueMap<float>>(n);
0129       }
0130     }
0131   }
0132   ~BaseMVAValueMapProducer() override {}
0133 
0134   void setValue(const std::string var, float val) {
0135     if (positions_.find(var) != positions_.end())
0136       values_[positions_[var]] = val;
0137   }
0138 
0139   static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
0140   static void globalEndJob(const BaseMVACache* cache);
0141 
0142   static edm::ParameterSetDescription getDescription();
0143   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0144 
0145 private:
0146   void beginStream(edm::StreamID) override{};
0147   void produce(edm::Event&, const edm::EventSetup&) override;
0148   void endStream() override{};
0149 
0150   ///to be implemented in derived classes, filling values for additional variables
0151   virtual void readAdditionalCollections(edm::Event&, const edm::EventSetup&) {}
0152   virtual void fillAdditionalVariables(const T&) {}
0153 
0154   edm::EDGetTokenT<edm::View<T>> src_;
0155   std::map<std::string, size_t> positions_;
0156   std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
0157   std::vector<std::string> variablesOrder_;
0158   std::vector<float> values_;
0159   TMVA::Reader* reader_;
0160 
0161   std::string name_;
0162   std::string backend_;
0163   std::string weightfilename_;
0164   bool isClassifier_;
0165   bool tmva_;
0166   bool tf_;
0167   bool onnx_;
0168   bool batch_eval_;
0169   std::string inputTensorName_;
0170   std::string outputTensorName_;
0171   std::vector<std::string> output_names_;
0172   std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
0173 };
0174 
0175 template <typename T>
0176 void BaseMVAValueMapProducer<T>::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0177   edm::Handle<edm::View<T>> src;
0178   iEvent.getByToken(src_, src);
0179   readAdditionalCollections(iEvent, iSetup);
0180   std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
0181   for (auto& v : mvaOut)
0182     v.reserve(src->size());
0183 
0184   if (batch_eval_) {
0185     if (!src->empty()) {
0186       std::vector<float> data;
0187       data.reserve(src->size() * positions_.size());
0188       for (auto const& o : *src) {
0189         for (auto const& p : funcs_) {
0190           setValue(p.first, p.second(o));
0191         }
0192         fillAdditionalVariables(o);
0193         data.insert(data.end(), values_.begin(), values_.end());
0194       }
0195 
0196       std::vector<float> outputs;
0197       if (tf_) {
0198         tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
0199         tensorflow::NamedTensorList input_tensors;
0200         input_tensors.resize(1);
0201         input_tensors[0] =
0202             tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0203         for (unsigned i = 0; i < data.size(); ++i) {
0204           input_tensors[0].second.flat<float>()(i) = data[i];
0205         }
0206         std::vector<tensorflow::Tensor> output_tensors;
0207         tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
0208         for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
0209           outputs.push_back(output_tensors.at(0).flat<float>()(i));
0210         }
0211       } else if (onnx_) {
0212         cms::Ort::FloatArrays inputs{data};
0213         outputs =
0214             globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
0215       }
0216 
0217       const unsigned outdim = outputs.size() / src->size();
0218       for (unsigned i = 0; i < src->size(); ++i) {
0219         std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
0220         for (size_t k = 0; k < output_names_.size(); k++) {
0221           mvaOut[k].push_back(output_formulas_[k](tmpOut));
0222         }
0223       }
0224     }
0225   } else {
0226     for (auto const& o : *src) {
0227       for (auto const& p : funcs_) {
0228         setValue(p.first, p.second(o));
0229       }
0230       fillAdditionalVariables(o);
0231       if (tmva_) {
0232         mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
0233       } else {
0234         std::vector<float> tmpOut;
0235         if (tf_) {
0236           //currently support only one input sensor to reuse the TMVA like config
0237           tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
0238           tensorflow::NamedTensorList input_tensors;
0239           input_tensors.resize(1);
0240           input_tensors[0] =
0241               tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0242           for (size_t j = 0; j < values_.size(); j++) {
0243             input_tensors[0].second.matrix<float>()(0, j) = values_[j];
0244           }
0245           std::vector<tensorflow::Tensor> outputs;
0246           tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
0247           for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
0248             tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
0249         } else if (onnx_) {
0250           cms::Ort::FloatArrays inputs{values_};
0251           tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
0252         }
0253         for (size_t k = 0; k < output_names_.size(); k++)
0254           mvaOut[k].push_back(output_formulas_[k](tmpOut));
0255       }
0256     }
0257   }
0258 
0259   size_t k = 0;
0260   for (auto& m : mvaOut) {
0261     std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
0262     edm::ValueMap<float>::Filler filler(*mvaV);
0263     filler.insert(src, m.begin(), m.end());
0264     filler.fill();
0265     iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
0266     k++;
0267   }
0268 }
0269 
0270 template <typename T>
0271 std::unique_ptr<BaseMVACache> BaseMVAValueMapProducer<T>::initializeGlobalCache(const edm::ParameterSet& cfg) {
0272   return std::make_unique<BaseMVACache>(cfg.getParameter<edm::FileInPath>("weightFile").fullPath(),
0273                                         cfg.getParameter<std::string>("backend"));
0274 }
0275 
0276 template <typename T>
0277 void BaseMVAValueMapProducer<T>::globalEndJob(const BaseMVACache* cache) {}
0278 
0279 template <typename T>
0280 edm::ParameterSetDescription BaseMVAValueMapProducer<T>::getDescription() {
0281   edm::ParameterSetDescription desc;
0282   desc.add<edm::InputTag>("src")->setComment("input physics object collection");
0283   desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
0284   desc.add<std::string>("name")->setComment("output score variable name");
0285   desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
0286   edm::ParameterSetDescription variables;
0287   variables.setAllowAnything();
0288   desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
0289   desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
0290   desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
0291   desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
0292   desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
0293   desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
0294       ->setComment("Names of the output values to be used in the output valuemap");
0295   desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
0296       ->setComment("Formulas to be used to post process the output");
0297   desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
0298 
0299   return desc;
0300 }
0301 
0302 template <typename T>
0303 void BaseMVAValueMapProducer<T>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0304   edm::ParameterSetDescription desc = getDescription();
0305   std::string modname;
0306   if (typeid(T) == typeid(pat::Jet))
0307     modname += "Jet";
0308   else if (typeid(T) == typeid(pat::Muon))
0309     modname += "Muon";
0310   else if (typeid(T) == typeid(pat::Electron))
0311     modname += "Ele";
0312   modname += "BaseMVAValueMapProducer";
0313   descriptions.add(modname, desc);
0314 }
0315 
0316 #endif