Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-16 23:01:11

0001 #include "FWCore/Framework/interface/Frameworkfwd.h"
0002 #include "FWCore/Framework/interface/stream/EDProducer.h"
0003 
0004 #include "FWCore/Framework/interface/Event.h"
0005 #include "FWCore/Framework/interface/MakerMacros.h"
0006 
0007 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0008 
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/Utilities/interface/StreamID.h"
0011 
0012 #include "DataFormats/BTauReco/interface/JetTag.h"
0013 
0014 #include "DataFormats/BTauReco/interface/UnifiedParticleTransformerAK4TagInfo.h"
0015 #include "DataFormats/BTauReco/interface/UnifiedParticleTransformerAK4Features.h"
0016 
0017 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0018 
0019 using namespace cms::Ort;
0020 
0021 class UnifiedParticleTransformerAK4ONNXJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
0022 public:
0023   explicit UnifiedParticleTransformerAK4ONNXJetTagsProducer(const edm::ParameterSet&, const ONNXRuntime*);
0024   ~UnifiedParticleTransformerAK4ONNXJetTagsProducer() override = default;
0025 
0026   static void fillDescriptions(edm::ConfigurationDescriptions&);
0027 
0028   static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
0029   static void globalEndJob(const ONNXRuntime*);
0030 
0031 private:
0032   typedef std::vector<reco::UnifiedParticleTransformerAK4TagInfo> TagInfoCollection;
0033   typedef reco::JetTagCollection JetTagCollection;
0034 
0035   void produce(edm::Event&, const edm::EventSetup&) override;
0036 
0037   void make_inputs(btagbtvdeep::UnifiedParticleTransformerAK4Features features);
0038   void get_input_sizes(const reco::FeaturesTagInfo<btagbtvdeep::UnifiedParticleTransformerAK4Features> taginfo);
0039 
0040   const edm::EDGetTokenT<TagInfoCollection> src_;
0041   std::vector<std::string> flav_names_;
0042   std::vector<std::string> input_names_;
0043   std::vector<std::string> output_names_;
0044 
0045   enum InputIndexes {
0046     kChargedCandidates = 0,
0047     kLostTracks = 1,
0048     kNeutralCandidates = 2,
0049     kVertices = 3,
0050     kChargedCandidates4Vec = 4,
0051     kLostTracks4Vec = 5,
0052     kNeutralCandidates4Vec = 6,
0053     kVertices4Vec = 7
0054   };
0055   unsigned n_cpf_;
0056   constexpr static unsigned n_features_cpf_ = 25;
0057   constexpr static unsigned n_pairwise_features_cpf_ = 4;
0058   unsigned n_lt_;
0059   constexpr static unsigned n_features_lt_ = 18;
0060   constexpr static unsigned n_pairwise_features_lt_ = 4;
0061   unsigned n_npf_;
0062   constexpr static unsigned n_features_npf_ = 8;
0063   constexpr static unsigned n_pairwise_features_npf_ = 4;
0064   unsigned n_sv_;
0065   constexpr static unsigned n_features_sv_ = 14;
0066   constexpr static unsigned n_pairwise_features_sv_ = 4;
0067   std::vector<unsigned> input_sizes_;
0068   std::vector<std::vector<int64_t>> input_shapes_;  // shapes of each input group (-1 for dynamic axis)
0069 
0070   // hold the input data
0071   FloatArrays data_;
0072 };
0073 
0074 UnifiedParticleTransformerAK4ONNXJetTagsProducer::UnifiedParticleTransformerAK4ONNXJetTagsProducer(
0075     const edm::ParameterSet& iConfig, const ONNXRuntime* cache)
0076     : src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
0077       flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
0078       input_names_(iConfig.getParameter<std::vector<std::string>>("input_names")),
0079       output_names_(iConfig.getParameter<std::vector<std::string>>("output_names")) {
0080   // get output names from flav_names
0081   for (const auto& flav_name : flav_names_) {
0082     produces<JetTagCollection>(flav_name);
0083   }
0084 }
0085 
0086 void UnifiedParticleTransformerAK4ONNXJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0087   // pfUnifiedParticleTransformerAK4JetTags
0088   edm::ParameterSetDescription desc;
0089   desc.add<edm::InputTag>("src", edm::InputTag("pfUnifiedParticleTransformerAK4TagInfos"));
0090   desc.add<std::vector<std::string>>(
0091       "input_names", {"input_1", "input_2", "input_3", "input_4", "input_5", "input_6", "input_7", "input_8"});
0092   desc.add<edm::FileInPath>("model_path", edm::FileInPath("RecoBTag/Combined/data/UParTAK4/PUPPI/V00/UParTAK4.onnx"));
0093   desc.add<std::vector<std::string>>("output_names", {"softmax"});
0094   desc.add<std::vector<std::string>>(
0095       "flav_names",
0096       std::vector<std::string>{"probb",        "probbb",       "problepb",     "probc",         "probs",
0097                                "probu",        "probd",        "probg",        "probele",       "probmu",
0098                                "probtaup1h0p", "probtaup1h1p", "probtaup1h2p", "probtaup3h0p",  "probtaup3h1p",
0099                                "probtaum1h0p", "probtaum1h1p", "probtaum1h2p", "probtaum3h0p",  "probtaum3h1p",
0100                                "ptcorr",       "ptreshigh",    "ptreslow",     "ptnu",          "probemudata",
0101                                "probemumc",    "probdimudata", "probdimumc",   "probmutaudata", "probmutaumc"});
0102 
0103   descriptions.add("pfUnifiedParticleTransformerAK4JetTags", desc);
0104 }
0105 
0106 std::unique_ptr<ONNXRuntime> UnifiedParticleTransformerAK4ONNXJetTagsProducer::initializeGlobalCache(
0107     const edm::ParameterSet& iConfig) {
0108   return std::make_unique<ONNXRuntime>(iConfig.getParameter<edm::FileInPath>("model_path").fullPath());
0109 }
0110 
0111 void UnifiedParticleTransformerAK4ONNXJetTagsProducer::globalEndJob(const ONNXRuntime* cache) {}
0112 
0113 void UnifiedParticleTransformerAK4ONNXJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0114   edm::Handle<TagInfoCollection> tag_infos;
0115   iEvent.getByToken(src_, tag_infos);
0116 
0117   // initialize output collection
0118   std::vector<std::unique_ptr<JetTagCollection>> output_tags;
0119   if (!tag_infos->empty()) {
0120     auto jet_ref = tag_infos->begin()->jet();
0121     auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
0122     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0123       output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
0124     }
0125   } else {
0126     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0127       output_tags.emplace_back(std::make_unique<JetTagCollection>());
0128     }
0129   }
0130 
0131   for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0132     const auto& taginfo = (*tag_infos)[jet_n];
0133     std::vector<float> outputs(flav_names_.size(), -1.0);
0134     if (taginfo.features().is_filled) {
0135       get_input_sizes(taginfo);
0136 
0137       // run prediction with dynamic batch size per event
0138       input_shapes_ = {{(int64_t)1, (int64_t)n_cpf_, (int64_t)n_features_cpf_},
0139                        {(int64_t)1, (int64_t)n_lt_, (int64_t)n_features_lt_},
0140                        {(int64_t)1, (int64_t)n_npf_, (int64_t)n_features_npf_},
0141                        {(int64_t)1, (int64_t)n_sv_, (int64_t)n_features_sv_},
0142                        {(int64_t)1, (int64_t)n_cpf_, (int64_t)n_pairwise_features_cpf_},
0143                        {(int64_t)1, (int64_t)n_lt_, (int64_t)n_pairwise_features_lt_},
0144                        {(int64_t)1, (int64_t)n_npf_, (int64_t)n_pairwise_features_npf_},
0145                        {(int64_t)1, (int64_t)n_sv_, (int64_t)n_pairwise_features_sv_}};
0146 
0147       outputs = globalCache()->run(input_names_, data_, input_shapes_, output_names_, 1)[0];
0148       assert(outputs.size() == flav_names_.size());
0149     }
0150 
0151     const auto& jet_ref = tag_infos->at(jet_n).jet();
0152     for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0153       (*(output_tags[flav_n]))[jet_ref] = outputs[flav_n];
0154     }
0155   }
0156 
0157   // put into the event
0158   for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
0159     iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
0160   }
0161 }
0162 
0163 void UnifiedParticleTransformerAK4ONNXJetTagsProducer::get_input_sizes(
0164     const reco::FeaturesTagInfo<btagbtvdeep::UnifiedParticleTransformerAK4Features> taginfo) {
0165   const auto& features = taginfo.features();
0166 
0167   /// We require a fixed size due to an ONNX conversion issue (to be improved in the future ?) ///
0168   n_cpf_ = (unsigned int)29;
0169   n_lt_ = (unsigned int)5;
0170   n_npf_ = (unsigned int)25;
0171   n_sv_ = (unsigned int)5;
0172 
0173   input_sizes_ = {
0174       n_cpf_ * n_features_cpf_,
0175       n_lt_ * n_features_lt_,
0176       n_npf_ * n_features_npf_,
0177       n_sv_ * n_features_sv_,
0178       n_cpf_ * n_pairwise_features_cpf_,
0179       n_lt_ * n_pairwise_features_lt_,
0180       n_npf_ * n_pairwise_features_npf_,
0181       n_sv_ * n_pairwise_features_sv_,
0182   };
0183   // init data storage
0184   data_.clear();
0185   for (const auto& len : input_sizes_) {
0186     data_.emplace_back(1 * len, 0);
0187   }
0188 
0189   make_inputs(features);
0190 }
0191 
0192 void UnifiedParticleTransformerAK4ONNXJetTagsProducer::make_inputs(
0193     btagbtvdeep::UnifiedParticleTransformerAK4Features features) {
0194   float* ptr = nullptr;
0195   const float* start = nullptr;
0196   unsigned offset = 0;
0197 
0198   // c_pf candidates
0199   auto max_c_pf_n = std::min(features.c_pf_features.size(), (std::size_t)n_cpf_);
0200   for (std::size_t c_pf_n = 0; c_pf_n < max_c_pf_n; c_pf_n++) {
0201     const auto& c_pf_features = features.c_pf_features.at(c_pf_n);
0202     ptr = &data_[kChargedCandidates][offset + c_pf_n * n_features_cpf_];
0203     start = ptr;
0204     *ptr = c_pf_features.btagPf_trackEtaRel;
0205     *(++ptr) = c_pf_features.btagPf_trackPtRel;
0206     *(++ptr) = c_pf_features.btagPf_trackPPar;
0207     *(++ptr) = c_pf_features.btagPf_trackDeltaR;
0208     *(++ptr) = c_pf_features.btagPf_trackPParRatio;
0209     *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
0210     *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
0211     *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
0212     *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
0213     *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
0214     *(++ptr) = c_pf_features.ptrel;
0215     *(++ptr) = c_pf_features.drminsv;
0216     *(++ptr) = c_pf_features.vtx_ass;
0217     *(++ptr) = c_pf_features.puppiw;
0218     *(++ptr) = c_pf_features.chi2;
0219     *(++ptr) = c_pf_features.quality;
0220     *(++ptr) = c_pf_features.charge;
0221     *(++ptr) = c_pf_features.dz;
0222     *(++ptr) = c_pf_features.btagPf_trackDecayLen;
0223     *(++ptr) = c_pf_features.HadFrac;
0224     *(++ptr) = c_pf_features.CaloFrac;
0225     *(++ptr) = c_pf_features.pdgID;
0226     *(++ptr) = c_pf_features.lostInnerHits;
0227     *(++ptr) = c_pf_features.numberOfPixelHits;
0228     *(++ptr) = c_pf_features.numberOfStripHits;
0229 
0230     assert(start + n_features_cpf_ - 1 == ptr);
0231   }
0232 
0233   // n_lt candidates
0234   auto max_lt_n = std::min(features.lt_features.size(), (std::size_t)n_lt_);
0235   for (std::size_t lt_n = 0; lt_n < max_lt_n; lt_n++) {
0236     const auto& lt_features = features.lt_features.at(lt_n);
0237     ptr = &data_[kLostTracks][offset + lt_n * n_features_lt_];
0238     start = ptr;
0239     *ptr = lt_features.btagPf_trackEtaRel;
0240     *(++ptr) = lt_features.btagPf_trackPtRel;
0241     *(++ptr) = lt_features.btagPf_trackPPar;
0242     *(++ptr) = lt_features.btagPf_trackDeltaR;
0243     *(++ptr) = lt_features.btagPf_trackPParRatio;
0244     *(++ptr) = lt_features.btagPf_trackSip2dVal;
0245     *(++ptr) = lt_features.btagPf_trackSip2dSig;
0246     *(++ptr) = lt_features.btagPf_trackSip3dVal;
0247     *(++ptr) = lt_features.btagPf_trackSip3dSig;
0248     *(++ptr) = lt_features.btagPf_trackJetDistVal;
0249     *(++ptr) = lt_features.drminsv;
0250     *(++ptr) = lt_features.charge;
0251     *(++ptr) = lt_features.puppiw;
0252     *(++ptr) = lt_features.chi2;
0253     *(++ptr) = lt_features.quality;
0254     *(++ptr) = lt_features.lostInnerHits;
0255     *(++ptr) = lt_features.numberOfPixelHits;
0256     *(++ptr) = lt_features.numberOfStripHits;
0257     assert(start + n_features_lt_ - 1 == ptr);
0258   }
0259 
0260   // n_pf candidates
0261   auto max_n_pf_n = std::min(features.n_pf_features.size(), (std::size_t)n_npf_);
0262   for (std::size_t n_pf_n = 0; n_pf_n < max_n_pf_n; n_pf_n++) {
0263     const auto& n_pf_features = features.n_pf_features.at(n_pf_n);
0264     ptr = &data_[kNeutralCandidates][offset + n_pf_n * n_features_npf_];
0265     start = ptr;
0266     *ptr = n_pf_features.ptrel;
0267     *(++ptr) = n_pf_features.etarel;
0268     *(++ptr) = n_pf_features.phirel;
0269     *(++ptr) = n_pf_features.deltaR;
0270     *(++ptr) = n_pf_features.isGamma;
0271     *(++ptr) = n_pf_features.hadFrac;
0272     *(++ptr) = n_pf_features.drminsv;
0273     *(++ptr) = n_pf_features.puppiw;
0274     assert(start + n_features_npf_ - 1 == ptr);
0275   }
0276 
0277   // sv candidates
0278   auto max_sv_n = std::min(features.sv_features.size(), (std::size_t)n_sv_);
0279   for (std::size_t sv_n = 0; sv_n < max_sv_n; sv_n++) {
0280     const auto& sv_features = features.sv_features.at(sv_n);
0281     ptr = &data_[kVertices][offset + sv_n * n_features_sv_];
0282     start = ptr;
0283     *ptr = sv_features.pt;
0284     *(++ptr) = sv_features.deltaR;
0285     *(++ptr) = sv_features.mass;
0286     *(++ptr) = sv_features.etarel;
0287     *(++ptr) = sv_features.phirel;
0288     *(++ptr) = sv_features.ntracks;
0289     *(++ptr) = sv_features.chi2;
0290     *(++ptr) = sv_features.normchi2;
0291     *(++ptr) = sv_features.dxy;
0292     *(++ptr) = sv_features.dxysig;
0293     *(++ptr) = sv_features.d3d;
0294     *(++ptr) = sv_features.d3dsig;
0295     *(++ptr) = sv_features.costhetasvpv;
0296     *(++ptr) = sv_features.enratio;
0297     assert(start + n_features_sv_ - 1 == ptr);
0298   }
0299 
0300   // cpf pairwise features (4-vectors)
0301   auto max_cpf_n = std::min(features.c_pf_features.size(), (std::size_t)n_cpf_);
0302   for (std::size_t cpf_n = 0; cpf_n < max_cpf_n; cpf_n++) {
0303     const auto& cpf_pairwise_features = features.c_pf_features.at(cpf_n);
0304     ptr = &data_[kChargedCandidates4Vec][offset + cpf_n * n_pairwise_features_cpf_];
0305     start = ptr;
0306     *ptr = cpf_pairwise_features.px;
0307     *(++ptr) = cpf_pairwise_features.py;
0308     *(++ptr) = cpf_pairwise_features.pz;
0309     *(++ptr) = cpf_pairwise_features.e;
0310 
0311     assert(start + n_pairwise_features_cpf_ - 1 == ptr);
0312   }
0313 
0314   // lt pairwise features (4-vectors) specific case requiring (pt,eta,phi,e)
0315   auto max_lt_N = std::min(features.lt_features.size(), (std::size_t)n_lt_);
0316   for (std::size_t lt_N = 0; lt_N < max_lt_N; lt_N++) {
0317     const auto& lt_pairwise_features = features.lt_features.at(lt_N);
0318     ptr = &data_[kLostTracks4Vec][offset + lt_N * n_pairwise_features_lt_];
0319     start = ptr;
0320     *ptr = lt_pairwise_features.pt;
0321     *(++ptr) = lt_pairwise_features.eta;
0322     *(++ptr) = lt_pairwise_features.phi;
0323     *(++ptr) = lt_pairwise_features.e;
0324 
0325     assert(start + n_pairwise_features_lt_ - 1 == ptr);
0326   }
0327 
0328   // npf pairwise features (4-vectors)
0329   auto max_npf_n = std::min(features.n_pf_features.size(), (std::size_t)n_npf_);
0330   for (std::size_t npf_n = 0; npf_n < max_npf_n; npf_n++) {
0331     const auto& npf_pairwise_features = features.n_pf_features.at(npf_n);
0332     ptr = &data_[kNeutralCandidates4Vec][offset + npf_n * n_pairwise_features_npf_];
0333     start = ptr;
0334     *ptr = npf_pairwise_features.px;
0335     *(++ptr) = npf_pairwise_features.py;
0336     *(++ptr) = npf_pairwise_features.pz;
0337     *(++ptr) = npf_pairwise_features.e;
0338 
0339     assert(start + n_pairwise_features_npf_ - 1 == ptr);
0340   }
0341 
0342   // sv pairwise features (4-vectors)
0343   auto max_sv_N = std::min(features.sv_features.size(), (std::size_t)n_sv_);
0344   for (std::size_t sv_N = 0; sv_N < max_sv_N; sv_N++) {
0345     const auto& sv_pairwise_features = features.sv_features.at(sv_N);
0346     ptr = &data_[kVertices4Vec][offset + sv_N * n_pairwise_features_sv_];
0347     start = ptr;
0348     *ptr = sv_pairwise_features.px;
0349     *(++ptr) = sv_pairwise_features.py;
0350     *(++ptr) = sv_pairwise_features.pz;
0351     *(++ptr) = sv_pairwise_features.e;
0352 
0353     assert(start + n_pairwise_features_sv_ - 1 == ptr);
0354   }
0355 }
0356 
0357 //define this as a plug-in
0358 DEFINE_FWK_MODULE(UnifiedParticleTransformerAK4ONNXJetTagsProducer);