File indexing completed on 2024-09-07 04:37:20
0001 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0002 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include <memory>
0024
0025
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/stream/EDProducer.h"
0028
0029 #include "FWCore/Framework/interface/Event.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031
0032 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0033 #include "FWCore/Utilities/interface/StreamID.h"
0034
0035 #include "TMVA/Factory.h"
0036 #include "TMVA/Reader.h"
0037
0038 #include "CommonTools/Utils/interface/StringObjectFunction.h"
0039 #include "DataFormats/Common/interface/ValueMap.h"
0040 #include "CommonTools/MVAUtils/interface/TMVAZipReader.h"
0041 #include "DataFormats/PatCandidates/interface/Jet.h"
0042 #include "DataFormats/PatCandidates/interface/Muon.h"
0043 #include "DataFormats/PatCandidates/interface/Electron.h"
0044
0045 #include "DataFormats/PatCandidates/interface/Jet.h"
0046 #include "DataFormats/PatCandidates/interface/Muon.h"
0047 #include "DataFormats/PatCandidates/interface/Electron.h"
0048 #include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
0049 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0050
0051 #include <string>
0052
0053
0054
0055
0056 class BaseMVACache {
0057 public:
0058 BaseMVACache(const std::string& model_path, const std::string& backend, const bool disableONNXGraphOpt) {
0059 if (backend == "TF") {
0060 graph_.reset(tensorflow::loadGraphDef(model_path));
0061 tf_session_ = tensorflow::createSession(graph_.get());
0062 } else if (backend == "ONNX") {
0063 if (disableONNXGraphOpt) {
0064 Ort::SessionOptions sess_opts;
0065 sess_opts = cms::Ort::ONNXRuntime::defaultSessionOptions();
0066 sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
0067 ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path, &sess_opts);
0068 } else {
0069 ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
0070 }
0071 }
0072 }
0073 ~BaseMVACache() { tensorflow::closeSession(tf_session_); }
0074
0075 tensorflow::Session* getTFSession() const { return tf_session_; }
0076 const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
0077
0078 private:
0079 std::shared_ptr<tensorflow::GraphDef> graph_;
0080 tensorflow::Session* tf_session_ = nullptr;
0081 std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
0082 };
0083
0084 template <typename T>
0085 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
0086 public:
0087 explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig, const BaseMVACache* cache)
0088 : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
0089 name_(iConfig.getParameter<std::string>("name")),
0090 backend_(iConfig.getParameter<std::string>("backend")),
0091 weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
0092 tmva_(backend_ == "TMVA"),
0093 tf_(backend_ == "TF"),
0094 onnx_(backend_ == "ONNX"),
0095 batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
0096 if (tmva_) {
0097 reader_ = new TMVA::Reader();
0098 isClassifier_ = iConfig.getParameter<bool>("isClassifier");
0099 }
0100
0101 std::vector<edm::ParameterSet> const& varsPSet = iConfig.getParameter<std::vector<edm::ParameterSet>>("variables");
0102 values_.resize(varsPSet.size());
0103 size_t i = 0;
0104 for (const edm::ParameterSet& var_pset : varsPSet) {
0105 const std::string& vname = var_pset.getParameter<std::string>("name");
0106 if (var_pset.existsAs<std::string>("expr"))
0107 funcs_.emplace_back(
0108 std::pair<std::string, StringObjectFunction<T, true>>(vname, var_pset.getParameter<std::string>("expr")));
0109 positions_[vname] = i;
0110 if (tmva_)
0111 reader_->AddVariable(vname, (&values_.front()) + i);
0112 i++;
0113 }
0114
0115 if (tmva_) {
0116 reco::details::loadTMVAWeights(reader_, name_, weightfilename_);
0117 }
0118 if (tf_ || onnx_) {
0119 inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
0120 outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
0121 output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
0122 for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
0123 output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
0124 }
0125 }
0126
0127 if (tmva_)
0128 produces<edm::ValueMap<float>>();
0129 else {
0130 for (const auto& n : output_names_) {
0131 produces<edm::ValueMap<float>>(n);
0132 }
0133 }
0134 }
0135 ~BaseMVAValueMapProducer() override {}
0136
0137 void setValue(const std::string var, float val) {
0138 if (positions_.find(var) != positions_.end())
0139 values_[positions_[var]] = val;
0140 }
0141
0142 static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
0143 static void globalEndJob(const BaseMVACache* cache);
0144
0145 static edm::ParameterSetDescription getDescription();
0146 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0147
0148 private:
0149 void beginStream(edm::StreamID) override{};
0150 void produce(edm::Event&, const edm::EventSetup&) override;
0151 void endStream() override {}
0152
0153
0154 virtual void readAdditionalCollections(edm::Event&, const edm::EventSetup&) {}
0155 virtual void fillAdditionalVariables(const T&) {}
0156
0157 edm::EDGetTokenT<edm::View<T>> src_;
0158 std::map<std::string, size_t> positions_;
0159 std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
0160 std::vector<float> values_;
0161 TMVA::Reader* reader_;
0162
0163 std::string name_;
0164 std::string backend_;
0165 std::string weightfilename_;
0166 bool isClassifier_;
0167 bool tmva_;
0168 bool tf_;
0169 bool onnx_;
0170 bool batch_eval_;
0171 std::string inputTensorName_;
0172 std::string outputTensorName_;
0173 std::vector<std::string> output_names_;
0174 std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
0175 };
0176
0177 template <typename T>
0178 void BaseMVAValueMapProducer<T>::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0179 edm::Handle<edm::View<T>> src;
0180 iEvent.getByToken(src_, src);
0181 readAdditionalCollections(iEvent, iSetup);
0182 std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
0183 for (auto& v : mvaOut)
0184 v.reserve(src->size());
0185
0186 if (batch_eval_) {
0187 if (!src->empty()) {
0188 std::vector<float> data;
0189 data.reserve(src->size() * positions_.size());
0190 for (auto const& o : *src) {
0191 for (auto const& p : funcs_) {
0192 setValue(p.first, p.second(o));
0193 }
0194 fillAdditionalVariables(o);
0195 data.insert(data.end(), values_.begin(), values_.end());
0196 }
0197
0198 std::vector<float> outputs;
0199 if (tf_) {
0200 tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
0201 tensorflow::NamedTensorList input_tensors;
0202 input_tensors.resize(1);
0203 input_tensors[0] =
0204 tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0205 for (unsigned i = 0; i < data.size(); ++i) {
0206 input_tensors[0].second.flat<float>()(i) = data[i];
0207 }
0208 std::vector<tensorflow::Tensor> output_tensors;
0209 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
0210 for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
0211 outputs.push_back(output_tensors.at(0).flat<float>()(i));
0212 }
0213 } else if (onnx_) {
0214 cms::Ort::FloatArrays inputs{data};
0215 outputs =
0216 globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
0217 }
0218
0219 const unsigned outdim = outputs.size() / src->size();
0220 for (unsigned i = 0; i < src->size(); ++i) {
0221 std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
0222 for (size_t k = 0; k < output_names_.size(); k++) {
0223 mvaOut[k].push_back(output_formulas_[k](tmpOut));
0224 }
0225 }
0226 }
0227 } else {
0228 for (auto const& o : *src) {
0229 for (auto const& p : funcs_) {
0230 setValue(p.first, p.second(o));
0231 }
0232 fillAdditionalVariables(o);
0233 if (tmva_) {
0234 mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
0235 } else {
0236 std::vector<float> tmpOut;
0237 if (tf_) {
0238
0239 tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
0240 tensorflow::NamedTensorList input_tensors;
0241 input_tensors.resize(1);
0242 input_tensors[0] =
0243 tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
0244 for (size_t j = 0; j < values_.size(); j++) {
0245 input_tensors[0].second.matrix<float>()(0, j) = values_[j];
0246 }
0247 std::vector<tensorflow::Tensor> outputs;
0248 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
0249 for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
0250 tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
0251 } else if (onnx_) {
0252 cms::Ort::FloatArrays inputs{values_};
0253 tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
0254 }
0255 for (size_t k = 0; k < output_names_.size(); k++)
0256 mvaOut[k].push_back(output_formulas_[k](tmpOut));
0257 }
0258 }
0259 }
0260
0261 size_t k = 0;
0262 for (auto& m : mvaOut) {
0263 std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
0264 edm::ValueMap<float>::Filler filler(*mvaV);
0265 filler.insert(src, m.begin(), m.end());
0266 filler.fill();
0267 iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
0268 k++;
0269 }
0270 }
0271
0272 template <typename T>
0273 std::unique_ptr<BaseMVACache> BaseMVAValueMapProducer<T>::initializeGlobalCache(const edm::ParameterSet& cfg) {
0274 std::string backend = cfg.getParameter<std::string>("backend");
0275 bool disableONNXGraphOpt = false;
0276 if (backend == "ONNX")
0277 disableONNXGraphOpt = cfg.getParameter<bool>("disableONNXGraphOpt");
0278 return std::make_unique<BaseMVACache>(
0279 cfg.getParameter<edm::FileInPath>("weightFile").fullPath(), backend, disableONNXGraphOpt);
0280 }
0281
0282 template <typename T>
0283 void BaseMVAValueMapProducer<T>::globalEndJob(const BaseMVACache* cache) {}
0284
0285 template <typename T>
0286 edm::ParameterSetDescription BaseMVAValueMapProducer<T>::getDescription() {
0287 edm::ParameterSetDescription desc;
0288 desc.add<edm::InputTag>("src")->setComment("input physics object collection");
0289
0290 desc.add<std::string>("name")->setComment("output score variable name");
0291 desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file, or TF/ONNX model file");
0292 desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
0293
0294 edm::ParameterSetDescription variable;
0295 variable.add<std::string>("name")->setComment("name of the variable, either created by expr, or internally by code");
0296 variable.addOptional<std::string>("expr")->setComment(
0297 "a function to define the content of the model input, absence of it means the leaf is computed internally");
0298 variable.setComment("a PSet to define an entry to the ML model");
0299 desc.addVPSet("variables", variable);
0300
0301 auto itn = edm::ParameterDescription<std::string>(
0302 "inputTensorName", "", true, edm::Comment("Name of tensorflow input tensor in the model"));
0303 auto otn = edm::ParameterDescription<std::string>(
0304 "outputTensorName", "", true, edm::Comment("Name of tensorflow output tensor in the model"));
0305 auto on = edm::ParameterDescription<std::vector<std::string>>(
0306 "outputNames",
0307 std::vector<std::string>(),
0308 true,
0309 edm::Comment("Names of the output values to be used in the output valuemap"));
0310 auto of = edm::ParameterDescription<std::vector<std::string>>(
0311 "outputFormulas",
0312 std::vector<std::string>(),
0313 true,
0314 edm::Comment("Formulas to be used to post process the output"));
0315 auto dog = edm::ParameterDescription<bool>(
0316 "disableONNXGraphOpt", false, true, edm::Comment("Disable ONNX runtime graph optimization"));
0317
0318 desc.ifValue(edm::ParameterDescription<std::string>(
0319 "backend", "TMVA", true, edm::Comment("the backend to evaluate the model:tmva, tf or onnx")),
0320 "TMVA" >> edm::ParameterDescription<bool>(
0321 "isClassifier", true, true, edm::Comment("a classification or regression")) or
0322 "TF" >> (itn and otn and on and of) or "ONNX" >> (itn and otn and on and of and dog));
0323
0324 return desc;
0325 }
0326
0327 template <typename T>
0328 void BaseMVAValueMapProducer<T>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0329 edm::ParameterSetDescription desc = getDescription();
0330 std::string modname;
0331 if (typeid(T) == typeid(pat::Jet))
0332 modname += "Jet";
0333 else if (typeid(T) == typeid(pat::Muon))
0334 modname += "Muon";
0335 else if (typeid(T) == typeid(pat::Electron))
0336 modname += "Ele";
0337 modname += "BaseMVAValueMapProducer";
0338 descriptions.add(modname, desc);
0339 }
0340
0341 #endif