Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:28

0001 #include "FWCore/Framework/interface/Frameworkfwd.h"
0002 
0003 #include "FWCore/Framework/interface/Event.h"
0004 #include "FWCore/Framework/interface/MakerMacros.h"
0005 
0006 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0007 
0008 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0009 #include "FWCore/Utilities/interface/StreamID.h"
0010 
0011 #include "DataFormats/BTauReco/interface/JetTag.h"
0012 
0013 #include "DataFormats/BTauReco/interface/DeepBoostedJetTagInfo.h"
0014 
0015 #include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h"
0016 
0017 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0018 
0019 #include "RecoBTag/FeatureTools/interface/deep_helpers.h"
0020 
0021 #include <iostream>
0022 #include <fstream>
0023 #include <algorithm>
0024 #include <numeric>
0025 #include <nlohmann/json.hpp>
0026 
0027 using namespace btagbtvdeep;
0028 
0029 class ParticleNetSonicJetTagsProducer : public TritonEDProducer<> {
0030 public:
0031   explicit ParticleNetSonicJetTagsProducer(const edm::ParameterSet &);
0032   ~ParticleNetSonicJetTagsProducer() override;
0033 
0034   void acquire(edm::Event const &iEvent, edm::EventSetup const &iSetup, Input &iInput) override;
0035   void produce(edm::Event &iEvent, edm::EventSetup const &iSetup, Output const &iOutput) override;
0036   static void fillDescriptions(edm::ConfigurationDescriptions &);
0037 
0038 private:
0039   typedef std::vector<reco::DeepBoostedJetTagInfo> TagInfoCollection;
0040   typedef reco::JetTagCollection JetTagCollection;
0041 
0042   void make_inputs(const reco::DeepBoostedJetTagInfo &taginfo);
0043 
0044   const edm::EDGetTokenT<TagInfoCollection> src_;
0045   std::vector<std::string> flav_names_;             // names of the output scores
0046   std::vector<std::string> input_names_;            // names of each input group - the ordering is important!
0047   std::vector<std::vector<int64_t>> input_shapes_;  // shapes of each input group (-1 for dynamic axis)
0048   std::vector<unsigned> input_sizes_;               // total length of each input vector
0049   std::unordered_map<std::string, PreprocessParams> prep_info_map_;  // preprocessing info for each input group
0050   bool debug_ = false;
0051   bool skippedInference_ = false;
0052   std::vector<bool> emptyJets_;
0053   unsigned numParticleGroups_ = 0;
0054   unsigned numVertexGroups_ = 0;
0055   unsigned numLostTrackGroups_ = 0;
0056   bool countedInputs = false;
0057   std::string particleNameExample_;
0058   std::string vertexNameExample_;
0059   std::string losttrackNameExample_;
0060 };
0061 
0062 ParticleNetSonicJetTagsProducer::ParticleNetSonicJetTagsProducer(const edm::ParameterSet &iConfig)
0063     : TritonEDProducer<>(iConfig),
0064       src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
0065       flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
0066       debug_(iConfig.getUntrackedParameter<bool>("debugMode", false)) {
0067   ParticleNetConstructor(iConfig, false, input_names_, prep_info_map_, input_shapes_, input_sizes_, nullptr);
0068 
0069   if (debug_) {
0070     LogDebug("ParticleNetSonicJetTagsProducer") << "<ParticleNetSonicJetTagsProducer::produce>:" << std::endl;
0071     for (unsigned i = 0; i < input_names_.size(); ++i) {
0072       const auto &group_name = input_names_.at(i);
0073       if (!input_shapes_.empty()) {
0074         LogDebug("ParticleNetSonicJetTagsProducer") << group_name << "\nshapes: ";
0075         for (const auto &x : input_shapes_.at(i)) {
0076           LogDebug("ParticleNetSonicJetTagsProducer") << x << ", ";
0077         }
0078       }
0079       LogDebug("ParticleNetSonicJetTagsProducer") << "\nvariables: ";
0080       for (const auto &x : prep_info_map_.at(group_name).var_names) {
0081         LogDebug("ParticleNetSonicJetTagsProducer") << x << ", ";
0082       }
0083       LogDebug("ParticleNetSonicJetTagsProducer") << "\n";
0084     }
0085     LogDebug("ParticleNetSonicJetTagsProducer") << "flav_names: ";
0086     for (const auto &flav_name : flav_names_) {
0087       LogDebug("ParticleNetSonicJetTagsProducer") << flav_name << ", ";
0088     }
0089     LogDebug("ParticleNetSonicJetTagsProducer") << "\n";
0090   }
0091 
0092   // get output names from flav_names
0093   for (const auto &flav_name : flav_names_) {
0094     produces<JetTagCollection>(flav_name);
0095   }
0096 
0097   if (!countedInputs) {
0098     int model_input_size = input_names_.size();
0099     for (int n = 0; n < model_input_size; n++) {
0100       if (prep_info_map_.at(input_names_.at(n)).var_names.at(0).find("pf") != std::string::npos) {
0101         if (numParticleGroups_ == 0) {
0102           particleNameExample_ = prep_info_map_.at(input_names_.at(n)).var_names.at(0);
0103         }
0104         numParticleGroups_++;
0105       } else if (prep_info_map_.at(input_names_.at(n)).var_names.at(0).find("sv") != std::string::npos) {
0106         if (numVertexGroups_ == 0) {
0107           vertexNameExample_ = prep_info_map_.at(input_names_.at(n)).var_names.at(0);
0108         }
0109         numVertexGroups_++;
0110       } else if (prep_info_map_.at(input_names_.at(n)).var_names.at(0).find("lt") != std::string::npos) {
0111         if (numLostTrackGroups_ == 0) {
0112           losttrackNameExample_ = prep_info_map_.at(input_names_.at(n)).var_names.at(0);
0113         }
0114         numLostTrackGroups_++;
0115       }
0116     }
0117 
0118     countedInputs = true;
0119   }
0120 }
0121 
0122 ParticleNetSonicJetTagsProducer::~ParticleNetSonicJetTagsProducer() {}
0123 
0124 void ParticleNetSonicJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
0125   // pfDeepBoostedJetTags
0126   edm::ParameterSetDescription desc;
0127   TritonClient::fillPSetDescription(desc);
0128   desc.add<edm::InputTag>("src", edm::InputTag("pfDeepBoostedJetTagInfos"));
0129   desc.add<std::string>("preprocess_json", "");
0130   // `preprocessParams` is deprecated -- use the preprocessing json file instead
0131   edm::ParameterSetDescription preprocessParams;
0132   preprocessParams.setAllowAnything();
0133   preprocessParams.setComment("`preprocessParams` is deprecated, please use `preprocess_json` instead.");
0134   desc.addOptional<edm::ParameterSetDescription>("preprocessParams", preprocessParams);
0135   desc.add<std::vector<std::string>>("flav_names",
0136                                      std::vector<std::string>{
0137                                          "probTbcq",
0138                                          "probTbqq",
0139                                          "probTbc",
0140                                          "probTbq",
0141                                          "probWcq",
0142                                          "probWqq",
0143                                          "probZbb",
0144                                          "probZcc",
0145                                          "probZqq",
0146                                          "probHbb",
0147                                          "probHcc",
0148                                          "probHqqqq",
0149                                          "probQCDbb",
0150                                          "probQCDcc",
0151                                          "probQCDb",
0152                                          "probQCDc",
0153                                          "probQCDothers",
0154                                      });
0155   desc.addOptionalUntracked<bool>("debugMode", false);
0156 
0157   descriptions.addWithDefaultLabel(desc);
0158 }
0159 
0160 void ParticleNetSonicJetTagsProducer::acquire(edm::Event const &iEvent, edm::EventSetup const &iSetup, Input &iInput) {
0161   edm::Handle<TagInfoCollection> tag_infos;
0162   iEvent.getByToken(src_, tag_infos);
0163   client_->setBatchSize(tag_infos->size());
0164   skippedInference_ = false;
0165 
0166   emptyJets_.clear();
0167 
0168   if (!tag_infos->empty()) {
0169     emptyJets_.reserve(tag_infos->size());
0170     unsigned int maxParticles = 0;
0171     unsigned int maxVertices = 0;
0172     unsigned int maxLT = 0;
0173     unsigned int numParticles;
0174     unsigned int numVertices;
0175     for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0176       numParticles = static_cast<unsigned int>(((*tag_infos)[jet_n]).features().get(particleNameExample_).size());
0177       numVertices = static_cast<unsigned int>(((*tag_infos)[jet_n]).features().get(vertexNameExample_).size());
0178       maxParticles = std::max(maxParticles, numParticles);
0179       maxVertices = std::max(maxVertices, numVertices);
0180 
0181       if (numParticles == 0 && numVertices == 0) {
0182         emptyJets_.push_back(true);
0183       } else {
0184         emptyJets_.push_back(false);
0185       }
0186 
0187       if (!(losttrackNameExample_.empty()) && numParticles > 0) {
0188         maxLT = std::max(maxLT,
0189                          static_cast<unsigned int>(((*tag_infos)[jet_n]).features().get(losttrackNameExample_).size()));
0190       }
0191     }
0192 
0193     if (maxParticles == 0 && maxVertices == 0 && maxLT == 0) {
0194       client_->setBatchSize(0);
0195       skippedInference_ = true;
0196       return;
0197     }
0198 
0199     unsigned int minPartFromJSON = prep_info_map_.at(input_names_[0]).min_length;
0200     unsigned int maxPartFromJSON = prep_info_map_.at(input_names_[0]).max_length;
0201     unsigned int minVertFromJSON = prep_info_map_.at(input_names_[numParticleGroups_]).min_length;
0202     unsigned int maxVertFromJSON = prep_info_map_.at(input_names_[numParticleGroups_]).max_length;
0203     maxParticles = std::clamp(maxParticles, minPartFromJSON, maxPartFromJSON);
0204     maxVertices = std::clamp(maxVertices, minVertFromJSON, maxVertFromJSON);
0205     if (!(losttrackNameExample_.empty())) {
0206       unsigned int minLTFromJSON = prep_info_map_.at(input_names_[numParticleGroups_ + numVertexGroups_]).min_length;
0207       unsigned int maxLTFromJSON = prep_info_map_.at(input_names_[numParticleGroups_ + numVertexGroups_]).max_length;
0208       maxLT = std::clamp(maxLT, minLTFromJSON, maxLTFromJSON);
0209     }
0210 
0211     for (unsigned igroup = 0; igroup < input_names_.size(); ++igroup) {
0212       const auto &group_name = input_names_[igroup];
0213       auto &input = iInput.at(group_name);
0214       unsigned target;
0215       if (igroup < numParticleGroups_) {
0216         input.setShape(1, maxParticles);
0217         target = maxParticles;
0218       } else if (igroup < (numParticleGroups_ + numVertexGroups_)) {
0219         input.setShape(1, maxVertices);
0220         target = maxVertices;
0221       } else {
0222         input.setShape(1, maxLT);
0223         target = maxLT;
0224       }
0225       auto tdata = input.allocate<float>(true);
0226       for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0227         const auto &taginfo = (*tag_infos)[jet_n];
0228         auto &vdata = (*tdata)[jet_n];
0229         const auto &prep_params = prep_info_map_.at(group_name);
0230         unsigned curr_pos = 0;
0231         // transform/pad
0232         for (unsigned i = 0; i < prep_params.var_names.size(); ++i) {
0233           const auto &varname = prep_params.var_names[i];
0234           std::vector<float> bare(0);
0235           std::vector<float> raw_value;
0236           if (!emptyJets_.at(jet_n)) {
0237             raw_value = taginfo.features().get(varname);
0238           } else {
0239             raw_value = bare;
0240           }
0241           const auto &info = prep_params.info(varname);
0242           int insize = center_norm_pad_halfRagged(raw_value,
0243                                                   info.center,
0244                                                   info.norm_factor,
0245                                                   target,
0246                                                   vdata,
0247                                                   curr_pos,
0248                                                   info.pad,
0249                                                   info.replace_inf_value,
0250                                                   info.lower_bound,
0251                                                   info.upper_bound);
0252           curr_pos += insize;
0253           if (i == 0 && (!input_shapes_.empty())) {
0254             input_shapes_[igroup][2] = insize;
0255           }
0256 
0257           if (debug_) {
0258             LogDebug("acquire") << "<ParticleNetSonicJetTagsProducer::produce>:" << std::endl
0259                                 << " -- var=" << varname << ", center=" << info.center << ", scale=" << info.norm_factor
0260                                 << ", replace=" << info.replace_inf_value << ", pad=" << info.pad << std::endl;
0261             for (unsigned i = curr_pos - insize; i < curr_pos; i++) {
0262               LogDebug("acquire") << vdata[i] << ",";
0263             }
0264             LogDebug("acquire") << std::endl;
0265           }
0266         }
0267       }
0268       input.toServer(tdata);
0269     }
0270   }
0271 }
0272 
0273 void ParticleNetSonicJetTagsProducer::produce(edm::Event &iEvent,
0274                                               const edm::EventSetup &iSetup,
0275                                               Output const &iOutput) {
0276   edm::Handle<TagInfoCollection> tag_infos;
0277   iEvent.getByToken(src_, tag_infos);
0278 
0279   // initialize output collection
0280   std::vector<std::unique_ptr<JetTagCollection>> output_tags;
0281   if (!tag_infos->empty()) {
0282     auto jet_ref = tag_infos->begin()->jet();
0283     auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
0284     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0285       output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
0286     }
0287   } else {
0288     for (std::size_t i = 0; i < flav_names_.size(); i++) {
0289       output_tags.emplace_back(std::make_unique<JetTagCollection>());
0290     }
0291   }
0292 
0293   if (!tag_infos->empty()) {
0294     if (!skippedInference_) {
0295       const auto &output1 = iOutput.begin()->second;
0296       const auto &outputs_from_server = output1.fromServer<float>();
0297 
0298       for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0299         const auto &taginfo = (*tag_infos)[jet_n];
0300         const auto &jet_ref = tag_infos->at(jet_n).jet();
0301 
0302         if (emptyJets_.at(jet_n)) {
0303           for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0304             (*(output_tags[flav_n]))[jet_ref] = 0.;
0305           }
0306         } else if (!taginfo.features().empty()) {
0307           for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0308             (*(output_tags[flav_n]))[jet_ref] = outputs_from_server[jet_n][flav_n];
0309           }
0310         } else {
0311           for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0312             (*(output_tags[flav_n]))[jet_ref] = 0.;
0313           }
0314         }
0315       }
0316     } else {
0317       for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0318         const auto &jet_ref = tag_infos->at(jet_n).jet();
0319         for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0320           (*(output_tags[flav_n]))[jet_ref] = 0.;
0321         }
0322       }
0323     }
0324   }
0325 
0326   if (debug_) {
0327     LogDebug("produce") << "<ParticleNetSonicJetTagsProducer::produce>:" << std::endl
0328                         << "=== " << iEvent.id().run() << ":" << iEvent.id().luminosityBlock() << ":"
0329                         << iEvent.id().event() << " ===" << std::endl;
0330     for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0331       const auto &jet_ref = tag_infos->at(jet_n).jet();
0332       LogDebug("produce") << " - Jet #" << jet_n << ", pt=" << jet_ref->pt() << ", eta=" << jet_ref->eta()
0333                           << ", phi=" << jet_ref->phi() << std::endl;
0334       for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
0335         LogDebug("produce") << "    " << flav_names_.at(flav_n) << " = " << (*(output_tags.at(flav_n)))[jet_ref]
0336                             << std::endl;
0337       }
0338     }
0339   }
0340 
0341   // put into the event
0342   for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
0343     iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
0344   }
0345 }
0346 
0347 //define this as a plug-in
0348 DEFINE_FWK_MODULE(ParticleNetSonicJetTagsProducer);