Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:28

0001 #include "FWCore/Framework/interface/Frameworkfwd.h"
0002 #include "FWCore/Framework/interface/stream/EDProducer.h"
0003 
0004 #include "FWCore/Framework/interface/Event.h"
0005 #include "FWCore/Framework/interface/MakerMacros.h"
0006 
0007 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0008 
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/Utilities/interface/StreamID.h"
0011 
0012 #include "DataFormats/BTauReco/interface/JetTag.h"
0013 
0014 #include "DataFormats/BTauReco/interface/DeepDoubleXTagInfo.h"
0015 
0016 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0017 
0018 #include <algorithm>
0019 #include <iostream>
0020 #include <fstream>
0021 
0022 using namespace cms::Ort;
0023 
0024 class DeepDoubleXONNXJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
0025 public:
0026   explicit DeepDoubleXONNXJetTagsProducer(const edm::ParameterSet&, const ONNXRuntime*);
0027   ~DeepDoubleXONNXJetTagsProducer() override;
0028 
0029   static void fillDescriptions(edm::ConfigurationDescriptions&);
0030 
0031   static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
0032   static void globalEndJob(const ONNXRuntime*);
0033 
0034 private:
0035   typedef std::vector<reco::DeepDoubleXTagInfo> TagInfoCollection;
0036   typedef reco::JetTagCollection JetTagCollection;
0037 
0038   void produce(edm::Event&, const edm::EventSetup&) override;
0039 
0040   void make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo& taginfo);
0041 
0042   const edm::EDGetTokenT<TagInfoCollection> src_;
0043   std::vector<std::string> flav_names_;
0044   std::vector<std::string> input_names_;
0045   std::vector<std::string> output_names_;
0046   std::string version_;
0047 
0048   unsigned n_features_global_;
0049   unsigned n_npf_, n_features_npf_;
0050   unsigned n_cpf_, n_features_cpf_;
0051   unsigned n_sv_, n_features_sv_;
0052   unsigned kGlobal, kNeutralCandidates, kChargedCandidates, kVertices;
0053   std::vector<unsigned> input_sizes_;
0054 
0055   // hold the input data
0056   FloatArrays data_;
0057 
0058   bool debug_ = false;
0059 };
0060 
0061 DeepDoubleXONNXJetTagsProducer::DeepDoubleXONNXJetTagsProducer(const edm::ParameterSet& iConfig,
0062                                                                const ONNXRuntime* cache)
0063     : src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
0064       flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
0065       input_names_(iConfig.getParameter<std::vector<std::string>>("input_names")),
0066       output_names_(iConfig.getParameter<std::vector<std::string>>("output_names")),
0067       version_(iConfig.getParameter<std::string>("version")),
0068       debug_(iConfig.getUntrackedParameter<bool>("debugMode", false)) {
0069   // get output names from flav_names
0070   for (const auto& flav_name : flav_names_) {
0071     produces<JetTagCollection>(flav_name);
0072   }
0073 
0074   if (version_ == "V2") {
0075     n_features_global_ = 5;
0076     n_npf_ = 60;
0077     n_features_npf_ = 8;
0078     n_cpf_ = 40;
0079     n_features_cpf_ = 21;
0080     n_sv_ = 5;
0081     n_features_sv_ = 7;
0082     input_sizes_ = {n_features_global_, n_npf_ * n_features_npf_, n_cpf_ * n_features_cpf_, n_sv_ * n_features_sv_};
0083     kGlobal = 0;
0084     kNeutralCandidates = 1;
0085     kChargedCandidates = 2;
0086     kVertices = 3;
0087   } else {
0088     n_features_global_ = 27;
0089     n_cpf_ = 60;
0090     n_features_cpf_ = 8;
0091     n_sv_ = 5;
0092     n_features_sv_ = 2;
0093     input_sizes_ = {n_features_global_, n_cpf_ * n_features_cpf_, n_sv_ * n_features_sv_};
0094     kGlobal = 0;
0095     kChargedCandidates = 1;
0096     kVertices = 2;
0097   }
0098 
0099   assert(input_names_.size() == input_sizes_.size());
0100 }
0101 
0102 DeepDoubleXONNXJetTagsProducer::~DeepDoubleXONNXJetTagsProducer() {}
0103 
0104 void DeepDoubleXONNXJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0105   edm::ParameterSetDescription desc;
0106   desc.add<edm::InputTag>("src", edm::InputTag("pfDeepDoubleXTagInfos"));
0107   desc.add<std::vector<std::string>>("input_names", {"input_1", "input_2", "input_3"});
0108   desc.add<std::vector<std::string>>("output_names", {});
0109   desc.add<std::string>("version", "V1");
0110 
0111   using FIP = edm::FileInPath;
0112   using PDFIP = edm::ParameterDescription<edm::FileInPath>;
0113   using PDPSD = edm::ParameterDescription<std::vector<std::string>>;
0114   using PDCases = edm::ParameterDescriptionCases<std::string>;
0115   using PDVersion = edm::ParameterDescription<std::string>;
0116   auto flavorCases = [&]() {
0117     return "BvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHbb"}, true) and
0118                      PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDB.onnx"), true)) or
0119            "CvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHcc"}, true) and
0120                      PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDC.onnx"), true)) or
0121            "CvB" >> (PDPSD("flav_names", std::vector<std::string>{"probHbb", "probHcc"}, true) and
0122                      PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDCvB.onnx"), true));
0123   };
0124   auto descBvL(desc);
0125   descBvL.ifValue(edm::ParameterDescription<std::string>("flavor", "BvL", true), flavorCases());
0126   descriptions.add("pfDeepDoubleBvLJetTags", descBvL);
0127 
0128   auto descCvL(desc);
0129   descCvL.ifValue(edm::ParameterDescription<std::string>("flavor", "CvL", true), flavorCases());
0130   descriptions.add("pfDeepDoubleCvLJetTags", descCvL);
0131 
0132   auto descCvB(desc);
0133   descCvB.ifValue(edm::ParameterDescription<std::string>("flavor", "CvB", true), flavorCases());
0134   descriptions.add("pfDeepDoubleCvBJetTags", descCvB);
0135 }
0136 
0137 std::unique_ptr<ONNXRuntime> DeepDoubleXONNXJetTagsProducer::initializeGlobalCache(const edm::ParameterSet& iConfig) {
0138   return std::make_unique<ONNXRuntime>(iConfig.getParameter<edm::FileInPath>("model_path").fullPath());
0139 }
0140 
0141 void DeepDoubleXONNXJetTagsProducer::globalEndJob(const ONNXRuntime* cache) {}
0142 
0143 void DeepDoubleXONNXJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0144   edm::Handle<TagInfoCollection> tag_infos;
0145   iEvent.getByToken(src_, tag_infos);
0146 
0147   std::vector<std::unique_ptr<JetTagCollection>> output_tags;
0148   if (!tag_infos->empty()) {
0149     // initialize output collection
0150     auto jet_ref = tag_infos->begin()->jet();
0151     auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
0152     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0153       output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
0154     }
0155 
0156     // only need to run on jets with non-empty features
0157     auto batch_size = std::count_if(
0158         tag_infos->begin(), tag_infos->end(), [](const auto& taginfo) { return !taginfo.features().empty(); });
0159 
0160     std::vector<float> etas_debug;
0161     std::vector<float> outputs;
0162     if (batch_size > 0) {
0163       // init data storage
0164       data_.clear();
0165       for (const auto& len : input_sizes_) {
0166         data_.emplace_back(batch_size * len, 0);
0167       }
0168 
0169       // convert inputs
0170       unsigned idx = 0;
0171       for (const auto& taginfo : *tag_infos) {
0172         if (!taginfo.features().empty()) {
0173           make_inputs(idx, taginfo);
0174           if (debug_) {
0175             etas_debug.push_back(taginfo.jet()->eta());
0176           }
0177           ++idx;
0178         }
0179       }
0180 
0181       std::sort(input_names_.begin(), input_names_.end());  // input_names order on input is not preserved
0182       // run prediction
0183       outputs = globalCache()->run(input_names_, data_, {}, output_names_, batch_size)[0];
0184 
0185       if (debug_) {
0186         // Dump inputs to file
0187         std::ofstream outfile;
0188         outfile.open("test.txt", std::ios_base::app);
0189         outfile << iEvent.id().event() << std::endl;
0190         outfile << batch_size << std::endl;
0191         for (float x : etas_debug)
0192           outfile << x << ' ';
0193         outfile << std::endl;
0194         int _i = 0;
0195         for (const std::vector<float>& v : data_) {
0196           outfile << "input_" << _i << std::endl;
0197           for (float x : v)
0198             outfile << x << ' ';
0199           outfile << std::endl;
0200           _i = _i + 1;
0201         }
0202         outfile << "outputs" << std::endl;
0203         for (float x : outputs)
0204           outfile << x << ' ';
0205         outfile << std::endl;
0206       }
0207 
0208       assert(outputs.size() == flav_names_.size() * batch_size);
0209     }
0210 
0211     // get the outputs
0212     unsigned i_output = 0;
0213     for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0214       const auto& taginfo = tag_infos->at(jet_n);
0215       const auto& jet_ref = taginfo.jet();
0216       for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0217         if (!taginfo.features().empty()) {
0218           (*(output_tags[flav_n]))[jet_ref] = outputs[i_output];
0219           ++i_output;
0220         } else {
0221           (*(output_tags[flav_n]))[jet_ref] = -1.;
0222         }
0223       }
0224     }
0225   } else {
0226     // create empty output collection
0227     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0228       output_tags.emplace_back(std::make_unique<JetTagCollection>());
0229     }
0230   }
0231 
0232   // put into the event
0233   for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
0234     iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
0235   }
0236 }
0237 
0238 void DeepDoubleXONNXJetTagsProducer::make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo& taginfo) {
0239   const auto& features = taginfo.features();
0240   float* ptr = nullptr;
0241   const float* start = nullptr;
0242   unsigned offset = 0;
0243 
0244   // DoubleB features
0245   offset = i_jet * input_sizes_[kGlobal];
0246   ptr = &data_[kGlobal][offset];
0247   start = ptr;
0248   const auto& tag_info_features = features.tag_info_features;
0249   *ptr = tag_info_features.jetNTracks;
0250   if (version_ == "V1") {
0251     *(++ptr) = tag_info_features.jetNSecondaryVertices;
0252   }
0253   *(++ptr) = tag_info_features.tau1_trackEtaRel_0;
0254   *(++ptr) = tag_info_features.tau1_trackEtaRel_1;
0255   if (version_ == "V1") {
0256     *(++ptr) = tag_info_features.tau1_trackEtaRel_2;
0257     *(++ptr) = tag_info_features.tau2_trackEtaRel_0;
0258     *(++ptr) = tag_info_features.tau2_trackEtaRel_1;
0259     *(++ptr) = tag_info_features.tau2_trackEtaRel_2;
0260     *(++ptr) = tag_info_features.tau1_flightDistance2dSig;
0261     *(++ptr) = tag_info_features.tau2_flightDistance2dSig;
0262   }
0263   *(++ptr) = tag_info_features.tau1_vertexDeltaR;
0264   *(++ptr) = tag_info_features.tau1_vertexEnergyRatio;
0265   if (version_ == "V1") {
0266     *(++ptr) = tag_info_features.tau2_vertexEnergyRatio;
0267     *(++ptr) = tag_info_features.tau1_vertexMass;
0268     *(++ptr) = tag_info_features.tau2_vertexMass;
0269     *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_0;
0270     *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_1;
0271     *(++ptr) = tag_info_features.trackSip2dSigAboveCharm;
0272     *(++ptr) = tag_info_features.trackSip3dSig_0;
0273     *(++ptr) = tag_info_features.tau1_trackSip3dSig_0;
0274     *(++ptr) = tag_info_features.tau1_trackSip3dSig_1;
0275     *(++ptr) = tag_info_features.trackSip3dSig_1;
0276     *(++ptr) = tag_info_features.tau2_trackSip3dSig_0;
0277     *(++ptr) = tag_info_features.tau2_trackSip3dSig_1;
0278     *(++ptr) = tag_info_features.trackSip3dSig_2;
0279     *(++ptr) = tag_info_features.trackSip3dSig_3;
0280     *(++ptr) = tag_info_features.z_ratio;
0281   }
0282   assert(start + n_features_global_ - 1 == ptr);
0283 
0284   // c_pf candidates
0285   auto max_c_pf_n = std::min(features.c_pf_features.size(), (std::size_t)n_cpf_);
0286   offset = i_jet * input_sizes_[kChargedCandidates];
0287   for (std::size_t c_pf_n = 0; c_pf_n < max_c_pf_n; c_pf_n++) {
0288     const auto& c_pf_features = features.c_pf_features.at(c_pf_n);
0289     ptr = &data_[kChargedCandidates][offset + c_pf_n * n_features_cpf_];
0290     start = ptr;
0291     *ptr = c_pf_features.btagPf_trackEtaRel;
0292     if (version_ == "V1") {
0293       *(++ptr) = c_pf_features.btagPf_trackPtRatio;
0294       *(++ptr) = c_pf_features.btagPf_trackPParRatio;
0295       *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
0296       *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
0297       *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
0298       *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
0299       *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
0300     } else {
0301       *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
0302       *(++ptr) = c_pf_features.btagPf_trackPParRatio;
0303       *(++ptr) = c_pf_features.btagPf_trackPtRatio;
0304       *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
0305       *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
0306       *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
0307       *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
0308       *(++ptr) = c_pf_features.deltaR;
0309       *(++ptr) = c_pf_features.drminsv;
0310       *(++ptr) = c_pf_features.drsubjet1;
0311       *(++ptr) = c_pf_features.drsubjet2;
0312       *(++ptr) = c_pf_features.dxy;
0313       *(++ptr) = c_pf_features.dxysig;
0314       *(++ptr) = c_pf_features.dz;
0315       *(++ptr) = c_pf_features.dzsig;
0316       *(++ptr) = c_pf_features.erel;
0317       *(++ptr) = c_pf_features.etarel;
0318       *(++ptr) = c_pf_features.chi2;
0319       *(++ptr) = c_pf_features.ptrel_noclip;
0320       *(++ptr) = c_pf_features.quality;
0321     }
0322 
0323     assert(start + n_features_cpf_ - 1 == ptr);
0324   }
0325 
0326   if (version_ == "V2") {
0327     // n_pf candidates
0328     auto max_n_pf_n = std::min(features.n_pf_features.size(), (std::size_t)n_cpf_);
0329     offset = i_jet * input_sizes_[kNeutralCandidates];
0330     for (std::size_t n_pf_n = 0; n_pf_n < max_n_pf_n; n_pf_n++) {
0331       const auto& n_pf_features = features.n_pf_features.at(n_pf_n);
0332       ptr = &data_[kNeutralCandidates][offset + n_pf_n * n_features_npf_];
0333       start = ptr;
0334       *ptr = n_pf_features.deltaR_noclip;
0335       *(++ptr) = n_pf_features.drminsv;
0336       *(++ptr) = n_pf_features.drsubjet1;
0337       *(++ptr) = n_pf_features.drsubjet2;
0338       *(++ptr) = n_pf_features.erel;
0339       *(++ptr) = n_pf_features.hadFrac;
0340       *(++ptr) = n_pf_features.ptrel_noclip;
0341       *(++ptr) = n_pf_features.puppiw;
0342       assert(start + n_features_npf_ - 1 == ptr);
0343     }
0344   }
0345 
0346   // sv candidates
0347   auto max_sv_n = std::min(features.sv_features.size(), (std::size_t)n_sv_);
0348   offset = i_jet * input_sizes_[kVertices];
0349   for (std::size_t sv_n = 0; sv_n < max_sv_n; sv_n++) {
0350     const auto& sv_features = features.sv_features.at(sv_n);
0351     ptr = &data_[kVertices][offset + sv_n * n_features_sv_];
0352     start = ptr;
0353     if (version_ == "V1") {
0354       *ptr = sv_features.d3d;
0355       *(++ptr) = sv_features.d3dsig;
0356     } else {
0357       *ptr = sv_features.costhetasvpv;
0358       *(++ptr) = sv_features.deltaR;
0359       *(++ptr) = sv_features.dxysig;
0360       *(++ptr) = sv_features.mass;
0361       *(++ptr) = sv_features.ntracks;
0362       *(++ptr) = sv_features.pt;
0363       *(++ptr) = sv_features.ptrel;
0364     }
0365     assert(start + n_features_sv_ - 1 == ptr);
0366   }
0367 }
0368 
0369 //define this as a plug-in
0370 DEFINE_FWK_MODULE(DeepDoubleXONNXJetTagsProducer);