File indexing completed on 2024-04-06 12:24:28
0001 #include "FWCore/Framework/interface/Frameworkfwd.h"
0002 #include "FWCore/Framework/interface/stream/EDProducer.h"
0003
0004 #include "FWCore/Framework/interface/Event.h"
0005 #include "FWCore/Framework/interface/MakerMacros.h"
0006
0007 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0008
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/Utilities/interface/StreamID.h"
0011
0012 #include "DataFormats/BTauReco/interface/JetTag.h"
0013
0014 #include "DataFormats/BTauReco/interface/DeepDoubleXTagInfo.h"
0015
0016 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0017
0018 #include <algorithm>
0019 #include <iostream>
0020 #include <fstream>
0021
0022 using namespace cms::Ort;
0023
0024 class DeepDoubleXONNXJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
0025 public:
0026 explicit DeepDoubleXONNXJetTagsProducer(const edm::ParameterSet&, const ONNXRuntime*);
0027 ~DeepDoubleXONNXJetTagsProducer() override;
0028
0029 static void fillDescriptions(edm::ConfigurationDescriptions&);
0030
0031 static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
0032 static void globalEndJob(const ONNXRuntime*);
0033
0034 private:
0035 typedef std::vector<reco::DeepDoubleXTagInfo> TagInfoCollection;
0036 typedef reco::JetTagCollection JetTagCollection;
0037
0038 void produce(edm::Event&, const edm::EventSetup&) override;
0039
0040 void make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo& taginfo);
0041
0042 const edm::EDGetTokenT<TagInfoCollection> src_;
0043 std::vector<std::string> flav_names_;
0044 std::vector<std::string> input_names_;
0045 std::vector<std::string> output_names_;
0046 std::string version_;
0047
0048 unsigned n_features_global_;
0049 unsigned n_npf_, n_features_npf_;
0050 unsigned n_cpf_, n_features_cpf_;
0051 unsigned n_sv_, n_features_sv_;
0052 unsigned kGlobal, kNeutralCandidates, kChargedCandidates, kVertices;
0053 std::vector<unsigned> input_sizes_;
0054
0055
0056 FloatArrays data_;
0057
0058 bool debug_ = false;
0059 };
0060
0061 DeepDoubleXONNXJetTagsProducer::DeepDoubleXONNXJetTagsProducer(const edm::ParameterSet& iConfig,
0062 const ONNXRuntime* cache)
0063 : src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
0064 flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
0065 input_names_(iConfig.getParameter<std::vector<std::string>>("input_names")),
0066 output_names_(iConfig.getParameter<std::vector<std::string>>("output_names")),
0067 version_(iConfig.getParameter<std::string>("version")),
0068 debug_(iConfig.getUntrackedParameter<bool>("debugMode", false)) {
0069
0070 for (const auto& flav_name : flav_names_) {
0071 produces<JetTagCollection>(flav_name);
0072 }
0073
0074 if (version_ == "V2") {
0075 n_features_global_ = 5;
0076 n_npf_ = 60;
0077 n_features_npf_ = 8;
0078 n_cpf_ = 40;
0079 n_features_cpf_ = 21;
0080 n_sv_ = 5;
0081 n_features_sv_ = 7;
0082 input_sizes_ = {n_features_global_, n_npf_ * n_features_npf_, n_cpf_ * n_features_cpf_, n_sv_ * n_features_sv_};
0083 kGlobal = 0;
0084 kNeutralCandidates = 1;
0085 kChargedCandidates = 2;
0086 kVertices = 3;
0087 } else {
0088 n_features_global_ = 27;
0089 n_cpf_ = 60;
0090 n_features_cpf_ = 8;
0091 n_sv_ = 5;
0092 n_features_sv_ = 2;
0093 input_sizes_ = {n_features_global_, n_cpf_ * n_features_cpf_, n_sv_ * n_features_sv_};
0094 kGlobal = 0;
0095 kChargedCandidates = 1;
0096 kVertices = 2;
0097 }
0098
0099 assert(input_names_.size() == input_sizes_.size());
0100 }
0101
0102 DeepDoubleXONNXJetTagsProducer::~DeepDoubleXONNXJetTagsProducer() {}
0103
0104 void DeepDoubleXONNXJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0105 edm::ParameterSetDescription desc;
0106 desc.add<edm::InputTag>("src", edm::InputTag("pfDeepDoubleXTagInfos"));
0107 desc.add<std::vector<std::string>>("input_names", {"input_1", "input_2", "input_3"});
0108 desc.add<std::vector<std::string>>("output_names", {});
0109 desc.add<std::string>("version", "V1");
0110
0111 using FIP = edm::FileInPath;
0112 using PDFIP = edm::ParameterDescription<edm::FileInPath>;
0113 using PDPSD = edm::ParameterDescription<std::vector<std::string>>;
0114 using PDCases = edm::ParameterDescriptionCases<std::string>;
0115 using PDVersion = edm::ParameterDescription<std::string>;
0116 auto flavorCases = [&]() {
0117 return "BvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHbb"}, true) and
0118 PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDB.onnx"), true)) or
0119 "CvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHcc"}, true) and
0120 PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDC.onnx"), true)) or
0121 "CvB" >> (PDPSD("flav_names", std::vector<std::string>{"probHbb", "probHcc"}, true) and
0122 PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDCvB.onnx"), true));
0123 };
0124 auto descBvL(desc);
0125 descBvL.ifValue(edm::ParameterDescription<std::string>("flavor", "BvL", true), flavorCases());
0126 descriptions.add("pfDeepDoubleBvLJetTags", descBvL);
0127
0128 auto descCvL(desc);
0129 descCvL.ifValue(edm::ParameterDescription<std::string>("flavor", "CvL", true), flavorCases());
0130 descriptions.add("pfDeepDoubleCvLJetTags", descCvL);
0131
0132 auto descCvB(desc);
0133 descCvB.ifValue(edm::ParameterDescription<std::string>("flavor", "CvB", true), flavorCases());
0134 descriptions.add("pfDeepDoubleCvBJetTags", descCvB);
0135 }
0136
0137 std::unique_ptr<ONNXRuntime> DeepDoubleXONNXJetTagsProducer::initializeGlobalCache(const edm::ParameterSet& iConfig) {
0138 return std::make_unique<ONNXRuntime>(iConfig.getParameter<edm::FileInPath>("model_path").fullPath());
0139 }
0140
0141 void DeepDoubleXONNXJetTagsProducer::globalEndJob(const ONNXRuntime* cache) {}
0142
0143 void DeepDoubleXONNXJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0144 edm::Handle<TagInfoCollection> tag_infos;
0145 iEvent.getByToken(src_, tag_infos);
0146
0147 std::vector<std::unique_ptr<JetTagCollection>> output_tags;
0148 if (!tag_infos->empty()) {
0149
0150 auto jet_ref = tag_infos->begin()->jet();
0151 auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
0152 for (std::size_t i = 0; i < flav_names_.size(); i++) {
0153 output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
0154 }
0155
0156
0157 auto batch_size = std::count_if(
0158 tag_infos->begin(), tag_infos->end(), [](const auto& taginfo) { return !taginfo.features().empty(); });
0159
0160 std::vector<float> etas_debug;
0161 std::vector<float> outputs;
0162 if (batch_size > 0) {
0163
0164 data_.clear();
0165 for (const auto& len : input_sizes_) {
0166 data_.emplace_back(batch_size * len, 0);
0167 }
0168
0169
0170 unsigned idx = 0;
0171 for (const auto& taginfo : *tag_infos) {
0172 if (!taginfo.features().empty()) {
0173 make_inputs(idx, taginfo);
0174 if (debug_) {
0175 etas_debug.push_back(taginfo.jet()->eta());
0176 }
0177 ++idx;
0178 }
0179 }
0180
0181 std::sort(input_names_.begin(), input_names_.end());
0182
0183 outputs = globalCache()->run(input_names_, data_, {}, output_names_, batch_size)[0];
0184
0185 if (debug_) {
0186
0187 std::ofstream outfile;
0188 outfile.open("test.txt", std::ios_base::app);
0189 outfile << iEvent.id().event() << std::endl;
0190 outfile << batch_size << std::endl;
0191 for (float x : etas_debug)
0192 outfile << x << ' ';
0193 outfile << std::endl;
0194 int _i = 0;
0195 for (const std::vector<float>& v : data_) {
0196 outfile << "input_" << _i << std::endl;
0197 for (float x : v)
0198 outfile << x << ' ';
0199 outfile << std::endl;
0200 _i = _i + 1;
0201 }
0202 outfile << "outputs" << std::endl;
0203 for (float x : outputs)
0204 outfile << x << ' ';
0205 outfile << std::endl;
0206 }
0207
0208 assert(outputs.size() == flav_names_.size() * batch_size);
0209 }
0210
0211
0212 unsigned i_output = 0;
0213 for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
0214 const auto& taginfo = tag_infos->at(jet_n);
0215 const auto& jet_ref = taginfo.jet();
0216 for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
0217 if (!taginfo.features().empty()) {
0218 (*(output_tags[flav_n]))[jet_ref] = outputs[i_output];
0219 ++i_output;
0220 } else {
0221 (*(output_tags[flav_n]))[jet_ref] = -1.;
0222 }
0223 }
0224 }
0225 } else {
0226
0227 for (std::size_t i = 0; i < flav_names_.size(); i++) {
0228 output_tags.emplace_back(std::make_unique<JetTagCollection>());
0229 }
0230 }
0231
0232
0233 for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
0234 iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
0235 }
0236 }
0237
0238 void DeepDoubleXONNXJetTagsProducer::make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo& taginfo) {
0239 const auto& features = taginfo.features();
0240 float* ptr = nullptr;
0241 const float* start = nullptr;
0242 unsigned offset = 0;
0243
0244
0245 offset = i_jet * input_sizes_[kGlobal];
0246 ptr = &data_[kGlobal][offset];
0247 start = ptr;
0248 const auto& tag_info_features = features.tag_info_features;
0249 *ptr = tag_info_features.jetNTracks;
0250 if (version_ == "V1") {
0251 *(++ptr) = tag_info_features.jetNSecondaryVertices;
0252 }
0253 *(++ptr) = tag_info_features.tau1_trackEtaRel_0;
0254 *(++ptr) = tag_info_features.tau1_trackEtaRel_1;
0255 if (version_ == "V1") {
0256 *(++ptr) = tag_info_features.tau1_trackEtaRel_2;
0257 *(++ptr) = tag_info_features.tau2_trackEtaRel_0;
0258 *(++ptr) = tag_info_features.tau2_trackEtaRel_1;
0259 *(++ptr) = tag_info_features.tau2_trackEtaRel_2;
0260 *(++ptr) = tag_info_features.tau1_flightDistance2dSig;
0261 *(++ptr) = tag_info_features.tau2_flightDistance2dSig;
0262 }
0263 *(++ptr) = tag_info_features.tau1_vertexDeltaR;
0264 *(++ptr) = tag_info_features.tau1_vertexEnergyRatio;
0265 if (version_ == "V1") {
0266 *(++ptr) = tag_info_features.tau2_vertexEnergyRatio;
0267 *(++ptr) = tag_info_features.tau1_vertexMass;
0268 *(++ptr) = tag_info_features.tau2_vertexMass;
0269 *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_0;
0270 *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_1;
0271 *(++ptr) = tag_info_features.trackSip2dSigAboveCharm;
0272 *(++ptr) = tag_info_features.trackSip3dSig_0;
0273 *(++ptr) = tag_info_features.tau1_trackSip3dSig_0;
0274 *(++ptr) = tag_info_features.tau1_trackSip3dSig_1;
0275 *(++ptr) = tag_info_features.trackSip3dSig_1;
0276 *(++ptr) = tag_info_features.tau2_trackSip3dSig_0;
0277 *(++ptr) = tag_info_features.tau2_trackSip3dSig_1;
0278 *(++ptr) = tag_info_features.trackSip3dSig_2;
0279 *(++ptr) = tag_info_features.trackSip3dSig_3;
0280 *(++ptr) = tag_info_features.z_ratio;
0281 }
0282 assert(start + n_features_global_ - 1 == ptr);
0283
0284
0285 auto max_c_pf_n = std::min(features.c_pf_features.size(), (std::size_t)n_cpf_);
0286 offset = i_jet * input_sizes_[kChargedCandidates];
0287 for (std::size_t c_pf_n = 0; c_pf_n < max_c_pf_n; c_pf_n++) {
0288 const auto& c_pf_features = features.c_pf_features.at(c_pf_n);
0289 ptr = &data_[kChargedCandidates][offset + c_pf_n * n_features_cpf_];
0290 start = ptr;
0291 *ptr = c_pf_features.btagPf_trackEtaRel;
0292 if (version_ == "V1") {
0293 *(++ptr) = c_pf_features.btagPf_trackPtRatio;
0294 *(++ptr) = c_pf_features.btagPf_trackPParRatio;
0295 *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
0296 *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
0297 *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
0298 *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
0299 *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
0300 } else {
0301 *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
0302 *(++ptr) = c_pf_features.btagPf_trackPParRatio;
0303 *(++ptr) = c_pf_features.btagPf_trackPtRatio;
0304 *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
0305 *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
0306 *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
0307 *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
0308 *(++ptr) = c_pf_features.deltaR;
0309 *(++ptr) = c_pf_features.drminsv;
0310 *(++ptr) = c_pf_features.drsubjet1;
0311 *(++ptr) = c_pf_features.drsubjet2;
0312 *(++ptr) = c_pf_features.dxy;
0313 *(++ptr) = c_pf_features.dxysig;
0314 *(++ptr) = c_pf_features.dz;
0315 *(++ptr) = c_pf_features.dzsig;
0316 *(++ptr) = c_pf_features.erel;
0317 *(++ptr) = c_pf_features.etarel;
0318 *(++ptr) = c_pf_features.chi2;
0319 *(++ptr) = c_pf_features.ptrel_noclip;
0320 *(++ptr) = c_pf_features.quality;
0321 }
0322
0323 assert(start + n_features_cpf_ - 1 == ptr);
0324 }
0325
0326 if (version_ == "V2") {
0327
0328 auto max_n_pf_n = std::min(features.n_pf_features.size(), (std::size_t)n_cpf_);
0329 offset = i_jet * input_sizes_[kNeutralCandidates];
0330 for (std::size_t n_pf_n = 0; n_pf_n < max_n_pf_n; n_pf_n++) {
0331 const auto& n_pf_features = features.n_pf_features.at(n_pf_n);
0332 ptr = &data_[kNeutralCandidates][offset + n_pf_n * n_features_npf_];
0333 start = ptr;
0334 *ptr = n_pf_features.deltaR_noclip;
0335 *(++ptr) = n_pf_features.drminsv;
0336 *(++ptr) = n_pf_features.drsubjet1;
0337 *(++ptr) = n_pf_features.drsubjet2;
0338 *(++ptr) = n_pf_features.erel;
0339 *(++ptr) = n_pf_features.hadFrac;
0340 *(++ptr) = n_pf_features.ptrel_noclip;
0341 *(++ptr) = n_pf_features.puppiw;
0342 assert(start + n_features_npf_ - 1 == ptr);
0343 }
0344 }
0345
0346
0347 auto max_sv_n = std::min(features.sv_features.size(), (std::size_t)n_sv_);
0348 offset = i_jet * input_sizes_[kVertices];
0349 for (std::size_t sv_n = 0; sv_n < max_sv_n; sv_n++) {
0350 const auto& sv_features = features.sv_features.at(sv_n);
0351 ptr = &data_[kVertices][offset + sv_n * n_features_sv_];
0352 start = ptr;
0353 if (version_ == "V1") {
0354 *ptr = sv_features.d3d;
0355 *(++ptr) = sv_features.d3dsig;
0356 } else {
0357 *ptr = sv_features.costhetasvpv;
0358 *(++ptr) = sv_features.deltaR;
0359 *(++ptr) = sv_features.dxysig;
0360 *(++ptr) = sv_features.mass;
0361 *(++ptr) = sv_features.ntracks;
0362 *(++ptr) = sv_features.pt;
0363 *(++ptr) = sv_features.ptrel;
0364 }
0365 assert(start + n_features_sv_ - 1 == ptr);
0366 }
0367 }
0368
0369
0370 DEFINE_FWK_MODULE(DeepDoubleXONNXJetTagsProducer);