File indexing completed on 2023-02-02 16:38:00
0001 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0002 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include <memory>
0024
0025
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/stream/EDProducer.h"
0028
0029 #include "FWCore/Framework/interface/Event.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031
0032 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0033 #include "FWCore/Utilities/interface/StreamID.h"
0034
0035 #include "TMVA/Factory.h"
0036 #include "TMVA/Reader.h"
0037
0038 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0039 #include "DataFormats/Common/interface/ValueMap.h"
0040 #include "CommonTools/MVAUtils/interface/TMVAZipReader.h"
0041 #include "DataFormats/PatCandidates/interface/Jet.h"
0042 #include "DataFormats/PatCandidates/interface/Muon.h"
0043 #include "DataFormats/PatCandidates/interface/Electron.h"
0044
0045 #include "DataFormats/PatCandidates/interface/Jet.h"
0046 #include "DataFormats/PatCandidates/interface/Muon.h"
0047 #include "DataFormats/PatCandidates/interface/Electron.h"
0048 #include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
0049 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0050
0051 #include <string>
0052
0053
0054
0055
0056 class BaseMVACache {
0057 public:
0058 BaseMVACache(const std::string& model_path, const std::string& backend, const bool disableONNXGraphOpt) {
0059 if (backend == "TF") {
0060 graph_.reset(tensorflow::loadGraphDef(model_path));
0061 tf_session_ = tensorflow::createSession(graph_.get());
0062 } else if (backend == "ONNX") {
0063 if (disableONNXGraphOpt) {
0064 Ort::SessionOptions sess_opts;
0065 sess_opts = cms::Ort::ONNXRuntime::defaultSessionOptions();
0066 sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
0067 ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path, &sess_opts);
0068 } else {
0069 ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
0070 }
0071 }
0072 }
0073 ~BaseMVACache() { tensorflow::closeSession(tf_session_); }
0074
0075 tensorflow::Session* getTFSession() const { return tf_session_; }
0076 const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
0077
0078 private:
0079 std::shared_ptr<tensorflow::GraphDef> graph_;
0080 tensorflow::Session* tf_session_ = nullptr;
0081 std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
0082 };
0083
0084 template <typename T>
0085 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
0086 public:
0087 explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig, const BaseMVACache* cache)
0088 : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
0089 variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
0090 name_(iConfig.getParameter<std::string>("name")),
0091 backend_(iConfig.getParameter<std::string>("backend")),
0092 weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
0093 isClassifier_(iConfig.getParameter<bool>("isClassifier")),
0094 tmva_(backend_ == "TMVA"),
0095 tf_(backend_ == "TF"),
0096 onnx_(backend_ == "ONNX"),
0097 batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
0098 if (!(tmva_ || tf_ || onnx_)) {
0099 throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
0100 }
0101
0102 if (tmva_)
0103 reader_ = new TMVA::Reader();
0104 edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
0105 for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
0106 funcs_.emplace_back(
0107 std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
0108 }
0109
0110 values_.resize(variablesOrder_.size());
0111 size_t i = 0;
0112 for (const auto& v : variablesOrder_) {
0113 positions_[v] = i;
0114 if (tmva_)
0115 reader_->AddVariable(v, (&values_.front()) + i);
0116 i++;
0117 }
0118
0119 if (tmva_) {
0120 reco::details::loadTMVAWeights(reader_, name_, weightfilename_);
0121 }
0122 if (tf_ || onnx_) {
0123 inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
0124 outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
0125 output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
0126 for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
0127 output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
0128 }
0129 }
0130
0131 if (tmva_)
0132 produces<edm::ValueMap<float>>();
0133 else {
0134 for (const auto& n : output_names_) {
0135 produces<edm::ValueMap<float>>(n);
0136 }
0137 }
0138 }
0139 ~BaseMVAValueMapProducer() override {}
0140
0141 void setValue(const std::string var, float val) {
0142 if (positions_.find(var) != positions_.end())
0143 values_[positions_[var]] = val;
0144 }
0145
0146 static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
0147 static void globalEndJob(const BaseMVACache* cache);
0148
0149 static edm::ParameterSetDescription getDescription();
0150 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0151
0152 private:
0153 void beginStream(edm::StreamID) override{};
0154 void produce(edm::Event&, const edm::EventSetup&) override;
0155 void endStream() override{};
0156
0157
0158 virtual void readAdditionalCollections(edm::Event&, const edm::EventSetup&) {}
0159 virtual void fillAdditionalVariables(const T&) {}
0160
0161 edm::EDGetTokenT<edm::View<T>> src_;
0162 std::map<std::string, size_t> positions_;
0163 std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
0164 std::vector<std::string> variablesOrder_;
0165 std::vector<float> values_;
0166 TMVA::Reader* reader_;
0167
0168 std::string name_;
0169 std::string backend_;
0170 std::string weightfilename_;
0171 bool isClassifier_;
0172 bool tmva_;
0173 bool tf_;
0174 bool onnx_;
0175 bool batch_eval_;
0176 std::string inputTensorName_;
0177 std::string outputTensorName_;
0178 std::vector<std::string> output_names_;
0179 std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
0180 };
0181
0182 template <typename T>
0183 void BaseMVAValueMapProducer<T>::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0184 edm::Handle<edm::View<T>> src;
0185 iEvent.getByToken(src_, src);
0186 readAdditionalCollections(iEvent, iSetup);
0187 std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
0188 for (auto& v : mvaOut)
0189 v.reserve(src->size());
0190
0191 if (batch_eval_) {
0192 if (!src->empty()) {
0193 std::vector<float> data;
0194 data.reserve(src->size() * positions_.size());
0195 for (auto const& o : *src) {
0196 for (auto const& p : funcs_) {
0197 setValue(p.first, p.second(o));
0198 }
0199 fillAdditionalVariables(o);
0200 data.insert(data.end(), values_.begin(), values_.end());
0201 }
0202
0203 std::vector<float> outputs;
0204 if (tf_) {
0205 tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
0206 tensorflow::NamedTensorList input_tensors;
0207 input_tensors.resize(1);
0208 input_tensors[0] =
0209 tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0210 for (unsigned i = 0; i < data.size(); ++i) {
0211 input_tensors[0].second.flat<float>()(i) = data[i];
0212 }
0213 std::vector<tensorflow::Tensor> output_tensors;
0214 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
0215 for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
0216 outputs.push_back(output_tensors.at(0).flat<float>()(i));
0217 }
0218 } else if (onnx_) {
0219 cms::Ort::FloatArrays inputs{data};
0220 outputs =
0221 globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
0222 }
0223
0224 const unsigned outdim = outputs.size() / src->size();
0225 for (unsigned i = 0; i < src->size(); ++i) {
0226 std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
0227 for (size_t k = 0; k < output_names_.size(); k++) {
0228 mvaOut[k].push_back(output_formulas_[k](tmpOut));
0229 }
0230 }
0231 }
0232 } else {
0233 for (auto const& o : *src) {
0234 for (auto const& p : funcs_) {
0235 setValue(p.first, p.second(o));
0236 }
0237 fillAdditionalVariables(o);
0238 if (tmva_) {
0239 mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
0240 } else {
0241 std::vector<float> tmpOut;
0242 if (tf_) {
0243
0244 tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
0245 tensorflow::NamedTensorList input_tensors;
0246 input_tensors.resize(1);
0247 input_tensors[0] =
0248 tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0249 for (size_t j = 0; j < values_.size(); j++) {
0250 input_tensors[0].second.matrix<float>()(0, j) = values_[j];
0251 }
0252 std::vector<tensorflow::Tensor> outputs;
0253 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
0254 for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
0255 tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
0256 } else if (onnx_) {
0257 cms::Ort::FloatArrays inputs{values_};
0258 tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
0259 }
0260 for (size_t k = 0; k < output_names_.size(); k++)
0261 mvaOut[k].push_back(output_formulas_[k](tmpOut));
0262 }
0263 }
0264 }
0265
0266 size_t k = 0;
0267 for (auto& m : mvaOut) {
0268 std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
0269 edm::ValueMap<float>::Filler filler(*mvaV);
0270 filler.insert(src, m.begin(), m.end());
0271 filler.fill();
0272 iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
0273 k++;
0274 }
0275 }
0276
0277 template <typename T>
0278 std::unique_ptr<BaseMVACache> BaseMVAValueMapProducer<T>::initializeGlobalCache(const edm::ParameterSet& cfg) {
0279 return std::make_unique<BaseMVACache>(cfg.getParameter<edm::FileInPath>("weightFile").fullPath(),
0280 cfg.getParameter<std::string>("backend"),
0281 cfg.getParameter<bool>("disableONNXGraphOpt"));
0282 }
0283
0284 template <typename T>
0285 void BaseMVAValueMapProducer<T>::globalEndJob(const BaseMVACache* cache) {}
0286
0287 template <typename T>
0288 edm::ParameterSetDescription BaseMVAValueMapProducer<T>::getDescription() {
0289 edm::ParameterSetDescription desc;
0290 desc.add<edm::InputTag>("src")->setComment("input physics object collection");
0291 desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
0292 desc.add<std::string>("name")->setComment("output score variable name");
0293 desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
0294 edm::ParameterSetDescription variables;
0295 variables.setAllowAnything();
0296 desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
0297 desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
0298 desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
0299 desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
0300 desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
0301 desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
0302 ->setComment("Names of the output values to be used in the output valuemap");
0303 desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
0304 ->setComment("Formulas to be used to post process the output");
0305 desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
0306 desc.add<bool>("disableONNXGraphOpt", false)->setComment("Disable ONNX runtime graph optimization");
0307
0308 return desc;
0309 }
0310
0311 template <typename T>
0312 void BaseMVAValueMapProducer<T>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0313 edm::ParameterSetDescription desc = getDescription();
0314 std::string modname;
0315 if (typeid(T) == typeid(pat::Jet))
0316 modname += "Jet";
0317 else if (typeid(T) == typeid(pat::Muon))
0318 modname += "Muon";
0319 else if (typeid(T) == typeid(pat::Electron))
0320 modname += "Ele";
0321 modname += "BaseMVAValueMapProducer";
0322 descriptions.add(modname, desc);
0323 }
0324
0325 #endif