Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:25

0001 
0002 #include "FWCore/Framework/interface/Frameworkfwd.h"
0003 #include "FWCore/Framework/interface/stream/EDProducer.h"
0004 
0005 #include "FWCore/Framework/interface/Event.h"
0006 #include "FWCore/Framework/interface/MakerMacros.h"
0007 
0008 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0009 #include "FWCore/Utilities/interface/StreamID.h"
0010 #include "FWCore/Utilities/interface/ESGetToken.h"
0011 
0012 #include "DataFormats/Candidate/interface/Candidate.h"
0013 
0014 #include "DataFormats/PatCandidates/interface/Jet.h"
0015 #include "DataFormats/PatCandidates/interface/PackedCandidate.h"
0016 
0017 #include "DataFormats/BTauReco/interface/BoostedDoubleSVTagInfo.h"
0018 
0019 #include "TrackingTools/Records/interface/TransientTrackRecord.h"
0020 #include "TrackingTools/TransientTrack/interface/TransientTrackBuilder.h"
0021 
0022 #include "DataFormats/BTauReco/interface/DeepDoubleXFeatures.h"
0023 #include "DataFormats/BTauReco/interface/DeepDoubleXTagInfo.h"
0024 
0025 #include "RecoBTag/FeatureTools/interface/BoostedDoubleSVTagInfoConverter.h"
0026 #include "RecoBTag/FeatureTools/interface/NeutralCandidateConverter.h"
0027 #include "RecoBTag/FeatureTools/interface/ChargedCandidateConverter.h"
0028 #include "RecoBTag/FeatureTools/interface/JetConverter.h"
0029 #include "RecoBTag/FeatureTools/interface/SecondaryVertexConverter.h"
0030 
0031 #include "RecoBTag/FeatureTools/interface/TrackInfoBuilder.h"
0032 #include "RecoBTag/FeatureTools/interface/sorting_modules.h"
0033 
0034 #include "DataFormats/Candidate/interface/VertexCompositePtrCandidate.h"
0035 #include "DataFormats/VertexReco/interface/VertexFwd.h"
0036 
0037 #include "RecoBTag/FeatureTools/interface/deep_helpers.h"
0038 
0039 class DeepDoubleXTagInfoProducer : public edm::stream::EDProducer<> {
0040 public:
0041   explicit DeepDoubleXTagInfoProducer(const edm::ParameterSet&);
0042   ~DeepDoubleXTagInfoProducer() override;
0043 
0044   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0045 
0046 private:
0047   typedef std::vector<reco::DeepDoubleXTagInfo> DeepDoubleXTagInfoCollection;
0048   typedef reco::VertexCompositePtrCandidateCollection SVCollection;
0049   typedef reco::VertexCollection VertexCollection;
0050   typedef edm::View<reco::BoostedDoubleSVTagInfo> BoostedDoubleSVTagInfoCollection;
0051 
0052   void beginStream(edm::StreamID) override {}
0053   void produce(edm::Event&, const edm::EventSetup&) override;
0054   void endStream() override {}
0055 
0056   const double jet_radius_;
0057   const double min_jet_pt_;
0058   const double min_candidate_pt_;
0059 
0060   edm::EDGetTokenT<edm::View<reco::Jet>> jet_token_;
0061   edm::EDGetTokenT<VertexCollection> vtx_token_;
0062   edm::EDGetTokenT<SVCollection> sv_token_;
0063   edm::EDGetTokenT<BoostedDoubleSVTagInfoCollection> shallow_tag_info_token_;
0064   edm::EDGetTokenT<edm::ValueMap<float>> puppi_value_map_token_;
0065   edm::ESGetToken<TransientTrackBuilder, TransientTrackRecord> track_builder_token_;
0066 
0067   bool use_puppi_value_map_;
0068   bool fallback_puppi_weight_;
0069   bool is_weighted_jet_;
0070 };
0071 
0072 DeepDoubleXTagInfoProducer::DeepDoubleXTagInfoProducer(const edm::ParameterSet& iConfig)
0073     : jet_radius_(iConfig.getParameter<double>("jet_radius")),
0074       min_jet_pt_(iConfig.getParameter<double>("min_jet_pt")),
0075       min_candidate_pt_(iConfig.getParameter<double>("min_candidate_pt")),
0076       jet_token_(consumes<edm::View<reco::Jet>>(iConfig.getParameter<edm::InputTag>("jets"))),
0077       vtx_token_(consumes<VertexCollection>(iConfig.getParameter<edm::InputTag>("vertices"))),
0078       sv_token_(consumes<SVCollection>(iConfig.getParameter<edm::InputTag>("secondary_vertices"))),
0079       shallow_tag_info_token_(
0080           consumes<BoostedDoubleSVTagInfoCollection>(iConfig.getParameter<edm::InputTag>("shallow_tag_infos"))),
0081       track_builder_token_(
0082           esConsumes<TransientTrackBuilder, TransientTrackRecord>(edm::ESInputTag("", "TransientTrackBuilder"))),
0083       use_puppi_value_map_(false),
0084       fallback_puppi_weight_(iConfig.getParameter<bool>("fallback_puppi_weight")),
0085       is_weighted_jet_(iConfig.getParameter<bool>("is_weighted_jet")) {
0086   produces<DeepDoubleXTagInfoCollection>();
0087 
0088   const auto& puppi_value_map_tag = iConfig.getParameter<edm::InputTag>("puppi_value_map");
0089   if (!puppi_value_map_tag.label().empty()) {
0090     puppi_value_map_token_ = consumes<edm::ValueMap<float>>(puppi_value_map_tag);
0091     use_puppi_value_map_ = true;
0092   } else if (is_weighted_jet_) {
0093     throw edm::Exception(edm::errors::Configuration,
0094                          "puppi_value_map is not set but jet is weighted. Must set puppi_value_map.");
0095   }
0096 }
0097 
0098 DeepDoubleXTagInfoProducer::~DeepDoubleXTagInfoProducer() {}
0099 
0100 void DeepDoubleXTagInfoProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0101   // pfDeepDoubleXTagInfos
0102   edm::ParameterSetDescription desc;
0103   desc.add<edm::InputTag>("shallow_tag_infos", edm::InputTag("pfBoostedDoubleSVAK8TagInfos"));
0104   desc.add<double>("jet_radius", 0.8);
0105   desc.add<double>("min_jet_pt", 150);
0106   desc.add<double>("min_candidate_pt", 0.95);
0107   desc.add<edm::InputTag>("vertices", edm::InputTag("offlinePrimaryVertices"));
0108   desc.add<edm::InputTag>("puppi_value_map", edm::InputTag("puppi"));
0109   desc.add<edm::InputTag>("secondary_vertices", edm::InputTag("inclusiveCandidateSecondaryVertices"));
0110   desc.add<edm::InputTag>("jets", edm::InputTag("ak8PFJetsPuppi"));
0111   desc.add<bool>("fallback_puppi_weight", false);
0112   desc.add<bool>("is_weighted_jet", true);
0113   descriptions.add("pfDeepDoubleXTagInfos", desc);
0114 }
0115 
0116 void DeepDoubleXTagInfoProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0117   auto output_tag_infos = std::make_unique<DeepDoubleXTagInfoCollection>();
0118 
0119   edm::Handle<edm::View<reco::Jet>> jets;
0120   iEvent.getByToken(jet_token_, jets);
0121 
0122   edm::Handle<VertexCollection> vtxs;
0123   iEvent.getByToken(vtx_token_, vtxs);
0124   if (vtxs->empty()) {
0125     // produce empty TagInfos in case no primary vertex
0126     iEvent.put(std::move(output_tag_infos));
0127     return;  // exit event
0128   }
0129   // reference to primary vertex
0130   const auto& pv = vtxs->at(0);
0131 
0132   edm::Handle<SVCollection> svs;
0133   iEvent.getByToken(sv_token_, svs);
0134 
0135   edm::Handle<BoostedDoubleSVTagInfoCollection> shallow_tag_infos;
0136   iEvent.getByToken(shallow_tag_info_token_, shallow_tag_infos);
0137 
0138   edm::Handle<edm::ValueMap<float>> puppi_value_map;
0139   if (use_puppi_value_map_) {
0140     iEvent.getByToken(puppi_value_map_token_, puppi_value_map);
0141   }
0142 
0143   edm::ESHandle<TransientTrackBuilder> track_builder = iSetup.getHandle(track_builder_token_);
0144 
0145   for (std::size_t jet_n = 0; jet_n < jets->size(); jet_n++) {
0146     // create data containing structure
0147     btagbtvdeep::DeepDoubleXFeatures features;
0148 
0149     // reco jet reference (use as much as possible)
0150     const auto& jet = jets->at(jet_n);
0151     const auto* pf_jet = dynamic_cast<const reco::PFJet*>(&jet);
0152     const auto* pat_jet = dynamic_cast<const pat::Jet*>(&jet);
0153     if (!pat_jet)
0154       throw edm::Exception(edm::errors::InvalidReference) << "Input is not a pat::Jet.";
0155 
0156     edm::RefToBase<reco::Jet> jet_ref(jets, jet_n);
0157     if (jet.pt() > min_jet_pt_) {
0158       features.filled();
0159       // TagInfoCollection not in an associative container so search for matchs
0160       const edm::View<reco::BoostedDoubleSVTagInfo>& taginfos = *shallow_tag_infos;
0161       edm::Ptr<reco::BoostedDoubleSVTagInfo> match;
0162       // Try first by 'same index'
0163       if ((jet_n < taginfos.size()) && (taginfos[jet_n].jet() == jet_ref)) {
0164         match = taginfos.ptrAt(jet_n);
0165       } else {
0166         // otherwise fall back to a simple search
0167         for (auto itTI = taginfos.begin(), edTI = taginfos.end(); itTI != edTI; ++itTI) {
0168           if (itTI->jet() == jet_ref) {
0169             match = taginfos.ptrAt(itTI - taginfos.begin());
0170             break;
0171           }
0172         }
0173       }
0174       reco::BoostedDoubleSVTagInfo tag_info;
0175       if (match.isNonnull()) {
0176         tag_info = *match;
0177       }  // will be default values otherwise
0178 
0179       // fill basic jet features
0180       btagbtvdeep::JetConverter::jetToFeatures(jet, features.jet_features);
0181 
0182       // fill number of pv
0183       features.npv = vtxs->size();
0184 
0185       // fill features from BoostedDoubleSVTagInfo
0186       const auto& tag_info_vars = tag_info.taggingVariables();
0187       btagbtvdeep::doubleBTagToFeatures(tag_info_vars, features.tag_info_features);
0188 
0189       // copy which will be sorted
0190       auto svs_sorted = *svs;
0191       // sort by dxy
0192       std::sort(svs_sorted.begin(), svs_sorted.end(), [&pv](const auto& sva, const auto& svb) {
0193         return btagbtvdeep::sv_vertex_comparator(sva, svb, pv);
0194       });
0195       // fill features from secondary vertices
0196       for (const auto& sv : svs_sorted) {
0197         if (reco::deltaR(sv, jet) > jet_radius_)
0198           continue;
0199         else {
0200           features.sv_features.emplace_back();
0201           // in C++17 could just get from emplace_back output
0202           auto& sv_features = features.sv_features.back();
0203           btagbtvdeep::svToFeatures(sv, pv, jet, sv_features);
0204         }
0205       }
0206 
0207       // stuff required for dealing with pf candidates
0208       math::XYZVector jet_dir = jet.momentum().Unit();
0209       GlobalVector jet_ref_track_dir(jet.px(), jet.py(), jet.pz());
0210 
0211       std::vector<btagbtvdeep::SortingClass<size_t>> c_sorted, n_sorted;
0212       std::vector<int> n_indexes;
0213 
0214       // to cache the TrackInfo
0215       std::map<unsigned int, btagbtvdeep::TrackInfoBuilder> trackinfos;
0216 
0217       // unsorted reference to sv
0218       const auto& svs_unsorted = *svs;
0219       // fill collection, from DeepTNtuples plus some styling
0220       // std::vector<const pat::PackedCandidate*> daughters;
0221       std::vector<const reco::Candidate*> daughters;
0222       for (unsigned int i = 0; i < jet.numberOfDaughters(); i++) {
0223         auto const* cand = jet.daughter(i);
0224         auto packed_cand = dynamic_cast<const pat::PackedCandidate*>(cand);
0225         auto reco_cand = dynamic_cast<const reco::PFCandidate*>(cand);
0226         if (packed_cand) {
0227           if (cand->numberOfDaughters() > 0) {
0228             for (unsigned int k = 0; k < cand->numberOfDaughters(); k++) {
0229               daughters.push_back(dynamic_cast<const pat::PackedCandidate*>(cand->daughter(k)));
0230             }
0231           } else {
0232             auto packed_cand = dynamic_cast<const pat::PackedCandidate*>(cand);
0233             daughters.push_back(packed_cand);
0234           }
0235         } else if (reco_cand) {
0236           // need some edm::Ptr or edm::Ref if reco candidates
0237           // dynamical casting to pointers, null if not possible
0238           daughters.push_back(reco_cand);
0239         }
0240       }
0241 
0242       std::sort(daughters.begin(), daughters.end(), [](const auto& a, const auto& b) { return a->pt() > b->pt(); });
0243       for (unsigned int i = 0; i < daughters.size(); i++) {
0244         auto const* cand = daughters.at(i);
0245         if (cand) {
0246           // candidates under 950MeV (configurable) are not considered
0247           // might change if we use also white-listing
0248           if (cand->pt() < min_candidate_pt_)
0249             continue;
0250           if (cand->charge() != 0) {
0251             auto& trackinfo = trackinfos.emplace(i, track_builder).first->second;
0252             trackinfo.buildTrackInfo(cand, jet_dir, jet_ref_track_dir, pv);
0253             c_sorted.emplace_back(i,
0254                                   trackinfo.getTrackSip2dSig(),
0255                                   -btagbtvdeep::mindrsvpfcand(svs_unsorted, cand, jet_radius_),
0256                                   cand->pt() / jet.pt());
0257           } else {
0258             n_sorted.emplace_back(
0259                 i, -1, -btagbtvdeep::mindrsvpfcand(svs_unsorted, cand, jet_radius_), cand->pt() / jet.pt());
0260             n_indexes.push_back(i);
0261           }
0262         }
0263       }
0264 
0265       // sort collections in added order of priority
0266       std::sort(c_sorted.begin(), c_sorted.end(), btagbtvdeep::SortingClass<std::size_t>::compareByABCInv);
0267       std::sort(n_sorted.begin(), n_sorted.end(), btagbtvdeep::SortingClass<std::size_t>::compareByABCInv);
0268 
0269       std::vector<size_t> c_sortedindices, n_sortedindices;
0270 
0271       // this puts 0 everywhere and the right position in ind
0272       c_sortedindices = btagbtvdeep::invertSortingVector(c_sorted);
0273       n_sortedindices = btagbtvdeep::invertSortingVector(n_sorted);
0274 
0275       // set right size to vectors
0276       features.c_pf_features.clear();
0277       features.c_pf_features.resize(c_sorted.size());
0278       features.n_pf_features.clear();
0279       features.n_pf_features.resize(n_sorted.size());
0280 
0281       for (unsigned int i = 0; i < daughters.size(); i++) {
0282         auto const* cand = daughters.at(i);
0283         if (cand) {
0284           // candidates under 950MeV are not considered
0285           // might change if we use also white-listing
0286           if (cand->pt() < min_candidate_pt_)
0287             continue;
0288           auto packed_cand = dynamic_cast<const pat::PackedCandidate*>(cand);
0289           auto reco_cand = dynamic_cast<const reco::PFCandidate*>(cand);
0290 
0291           reco::PFCandidatePtr reco_ptr;
0292           if (pf_jet) {
0293             reco_ptr = pf_jet->getPFConstituent(i);
0294           }
0295 
0296           reco::CandidatePtr cand_ptr;
0297           if (pat_jet) {
0298             cand_ptr = pat_jet->sourceCandidatePtr(i);
0299           }
0300 
0301           //
0302           // Access puppi weight from ValueMap.
0303           //
0304           float puppiw = 1.0;  // Set to fallback value
0305 
0306           if (reco_cand) {
0307             if (use_puppi_value_map_)
0308               puppiw = (*puppi_value_map)[reco_ptr];
0309             else if (!fallback_puppi_weight_) {
0310               throw edm::Exception(edm::errors::InvalidReference, "PUPPI value map missing")
0311                   << "use fallback_puppi_weight option to use " << puppiw << " for reco_cand as default";
0312             }
0313           } else if (packed_cand) {
0314             if (use_puppi_value_map_)
0315               puppiw = (*puppi_value_map)[cand_ptr];
0316             else if (!fallback_puppi_weight_) {
0317               throw edm::Exception(edm::errors::InvalidReference, "PUPPI value map missing")
0318                   << "use fallback_puppi_weight option to use " << puppiw << " for packed_cand as default";
0319             }
0320           } else {
0321             throw edm::Exception(edm::errors::InvalidReference)
0322                 << "Cannot convert to either reco::PFCandidate or pat::PackedCandidate";
0323           }
0324 
0325           float drminpfcandsv = btagbtvdeep::mindrsvpfcand(svs_unsorted, cand, jet_radius_);
0326           if (cand->charge() != 0) {
0327             // is charged candidate
0328             auto entry = c_sortedindices.at(i);
0329             // get cached track info
0330             auto& trackinfo = trackinfos.at(i);
0331             // get_ref to vector element
0332             auto& c_pf_features = features.c_pf_features.at(entry);
0333             if (packed_cand) {
0334               btagbtvdeep::packedCandidateToFeatures(packed_cand,
0335                                                      *pat_jet,
0336                                                      trackinfo,
0337                                                      is_weighted_jet_,
0338                                                      drminpfcandsv,
0339                                                      static_cast<float>(jet_radius_),
0340                                                      puppiw,
0341                                                      c_pf_features);
0342             } else if (reco_cand) {
0343               // get vertex association quality
0344               int pv_ass_quality = 0;  // fallback value
0345               // getting the PV as PackedCandidatesProducer
0346               // but using not the slimmed but original vertices
0347               auto ctrack = reco_cand->bestTrack();
0348               int pvi = -1;
0349               float dist = 1e99;
0350               for (size_t ii = 0; ii < vtxs->size(); ii++) {
0351                 float dz = (ctrack) ? std::abs(ctrack->dz(((*vtxs)[ii]).position())) : 0;
0352                 if (dz < dist) {
0353                   pvi = ii;
0354                   dist = dz;
0355                 }
0356               }
0357               auto pv = reco::VertexRef(vtxs, pvi);
0358               btagbtvdeep::recoCandidateToFeatures(reco_cand,
0359                                                    jet,
0360                                                    trackinfo,
0361                                                    is_weighted_jet_,
0362                                                    drminpfcandsv,
0363                                                    static_cast<float>(jet_radius_),
0364                                                    puppiw,
0365                                                    pv_ass_quality,
0366                                                    pv,
0367                                                    c_pf_features);
0368             }
0369           } else {
0370             // is neutral candidate
0371             auto entry = n_sortedindices.at(i);
0372             // get_ref to vector element
0373             auto& n_pf_features = features.n_pf_features.at(entry);
0374             // // fill feature structure
0375             if (packed_cand) {
0376               btagbtvdeep::packedCandidateToFeatures(packed_cand,
0377                                                      *pat_jet,
0378                                                      is_weighted_jet_,
0379                                                      drminpfcandsv,
0380                                                      static_cast<float>(jet_radius_),
0381                                                      puppiw,
0382                                                      n_pf_features);
0383             } else if (reco_cand) {
0384               btagbtvdeep::recoCandidateToFeatures(reco_cand,
0385                                                    jet,
0386                                                    is_weighted_jet_,
0387                                                    drminpfcandsv,
0388                                                    static_cast<float>(jet_radius_),
0389                                                    puppiw,
0390                                                    n_pf_features);
0391             }
0392           }
0393         }
0394       }
0395     }
0396     output_tag_infos->emplace_back(features, jet_ref);
0397   }
0398 
0399   iEvent.put(std::move(output_tag_infos));
0400 }
0401 
0402 // define this as a plug-in
0403 DEFINE_FWK_MODULE(DeepDoubleXTagInfoProducer);