Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:23

0001 // -*- C++ -*-
0002 //
0003 // Package:    RecoBTag/SecondaryVertex
0004 // Class:      BTagProbabilityToDiscriminator
0005 //
0006 /**
0007  *
0008  * Description: EDProducer that performs simple arithmetic on the
0009  * multi-classifier probabilities to compute simple discriminators
0010  *
0011  * Implementation:
0012  *    A collection of output discriminators is defined in a VPSet, each
0013  * containing the output name, input probabilities and normalization (empty
0014  * vInputTag if none) the output is computed as
0015  *         sum(INPUTS)/sum(normalizations)
0016  */
0017 //
0018 // Original Author:  Mauro Verzetti (CERN)
0019 //
0020 //
0021 
0022 // system include files
0023 #include <memory>
0024 
0025 // user include files
0026 #include "FWCore/Framework/interface/Frameworkfwd.h"
0027 #include "FWCore/Framework/interface/stream/EDProducer.h"
0028 
0029 #include "FWCore/Framework/interface/Event.h"
0030 #include "FWCore/Framework/interface/MakerMacros.h"
0031 
0032 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0033 #include "FWCore/Utilities/interface/StreamID.h"
0034 
0035 #include "DataFormats/BTauReco/interface/JetTag.h"
0036 #include "DataFormats/Common/interface/RefToBase.h"
0037 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0038 
0039 // from lwtnn
0040 #include <fstream>
0041 #include <iostream>
0042 #include <map>
0043 #include <set>
0044 #include <string>
0045 #include <vector>
0046 
0047 #include <boost/algorithm/string.hpp>
0048 #include <unordered_map>
0049 using namespace std;
0050 using namespace reco;
0051 //
0052 // class declaration
0053 //
0054 
0055 class BTagProbabilityToDiscriminator : public edm::stream::EDProducer<> {
0056 public:
0057   explicit BTagProbabilityToDiscriminator(const edm::ParameterSet &);
0058   ~BTagProbabilityToDiscriminator() override {}
0059 
0060   static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
0061 
0062 private:
0063   typedef std::vector<edm::InputTag> vInputTag;
0064   typedef std::vector<std::string> vstring;
0065   typedef std::vector<edm::ParameterSet> vPSet;
0066   struct Discriminator {
0067     std::string name;  // needed?
0068     vstring numerator;
0069     vstring denominator;
0070   };
0071 
0072   void beginStream(edm::StreamID) override {}
0073   void produce(edm::Event &, const edm::EventSetup &) override;
0074   void endStream() override {}
0075 
0076   // ----------member data ---------------------------
0077   std::vector<Discriminator> discrims_;
0078   std::unordered_map<std::string, edm::EDGetTokenT<JetTagCollection>> jet_tags_;  // caches jet tags to avoid repetitions
0079 };
0080 
0081 BTagProbabilityToDiscriminator::BTagProbabilityToDiscriminator(const edm::ParameterSet &iConfig) {
0082   for (const auto &discriminator : iConfig.getParameter<vPSet>("discriminators")) {
0083     Discriminator current;
0084     current.name = discriminator.getParameter<std::string>("name");
0085     produces<JetTagCollection>(current.name);
0086 
0087     for (const auto &intag : discriminator.getParameter<vInputTag>("numerator")) {
0088       if (jet_tags_.find(intag.encode()) == jet_tags_.end()) {  // new
0089                                                                 // probability
0090         jet_tags_[intag.encode()] = consumes<JetTagCollection>(intag);
0091       }
0092       current.numerator.push_back(intag.encode());
0093     }
0094 
0095     for (const auto &intag : discriminator.getParameter<vInputTag>("denominator")) {
0096       if (jet_tags_.find(intag.encode()) == jet_tags_.end()) {  // new
0097                                                                 // probability
0098         jet_tags_[intag.encode()] = consumes<JetTagCollection>(intag);
0099       }
0100       current.denominator.push_back(intag.encode());
0101     }
0102     discrims_.push_back(current);
0103   }
0104 
0105   if (jet_tags_.empty()) {
0106     throw cms::Exception("RuntimeError") << "The module BTagProbabilityToDiscriminator is run without any input "
0107                                             "probability to work on!"
0108                                          << std::endl;
0109   }
0110 }
0111 
0112 void BTagProbabilityToDiscriminator::produce(edm::Event &iEvent, const edm::EventSetup &iSetup) {
0113   std::unordered_map<std::string, edm::Handle<JetTagCollection>> tags;  // caches jet tags to avoid repetitions
0114   size_t size = 0;
0115   bool first = true;
0116   for (const auto &entry : jet_tags_) {
0117     edm::Handle<JetTagCollection> tmp;
0118     iEvent.getByToken(entry.second, tmp);
0119     tags[entry.first] = tmp;
0120     if (first)
0121       size = tmp->size();
0122     else {
0123       if (tmp->size() != size) {
0124         throw cms::Exception("RuntimeError") << "The length of one of the input jet tag collections does not "
0125                                                 "match "
0126                                              << "with the others, this is probably due to the probabilities "
0127                                                 "belonging to different jet collections, which is forbidden!"
0128                                              << std::endl;
0129       }
0130     }
0131     first = false;
0132   }
0133 
0134   // create the output collection
0135   // which is a "map" RefToBase<Jet> --> float
0136   vector<std::unique_ptr<JetTagCollection>> output_tags;
0137   output_tags.reserve(discrims_.size());
0138   for (size_t i = 0; i < discrims_.size(); ++i) {
0139     output_tags.push_back(
0140         std::make_unique<JetTagCollection>(*(tags.begin()->second))  // clone from the first element, will change
0141                                                                      // the content later on
0142     );
0143   }
0144 
0145   // loop over jets
0146   for (size_t idx = 0; idx < output_tags[0]->size(); idx++) {
0147     auto key = output_tags[0]->key(idx);  // use key only for writing
0148     // loop over new discriminators to produce
0149     for (size_t disc_idx = 0; disc_idx < output_tags.size(); disc_idx++) {
0150       float numerator = 0;
0151       for (auto &num : discrims_[disc_idx].numerator)
0152         numerator += (*tags[num])[idx].second;
0153       float denominator = !discrims_[disc_idx].denominator.empty() ? 0 : 1;
0154       for (auto &den : discrims_[disc_idx].denominator)
0155         denominator += (*tags[den])[idx].second;
0156       //protect against 0 denominator and undefined jet values (numerator probability < 0)
0157       float new_value = (denominator != 0 && numerator >= 0) ? numerator / denominator : -10.;
0158       (*output_tags[disc_idx])[key] = new_value;
0159     }
0160   }
0161 
0162   // put the output in the event
0163   for (size_t i = 0; i < output_tags.size(); ++i) {
0164     iEvent.put(std::move(output_tags[i]), discrims_[i].name);
0165   }
0166 }
0167 
0168 // ------------ method fills 'descriptions' with the allowed parameters for the
0169 // module  ------------
0170 void BTagProbabilityToDiscriminator::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
0171   edm::ParameterSetDescription desc;
0172   {
0173     edm::ParameterSetDescription vpsd1;
0174     vpsd1.add<std::vector<edm::InputTag>>("denominator", {});
0175     vpsd1.add<std::vector<edm::InputTag>>("numerator",
0176                                           {
0177                                               edm::InputTag("pfDeepCSVJetTags", "probb"),
0178                                               edm::InputTag("pfDeepCSVJetTags", "probbb"),
0179                                           });
0180     vpsd1.add<std::string>("name", "BvsAll");
0181     std::vector<edm::ParameterSet> temp1;
0182     temp1.reserve(3);
0183     {
0184       edm::ParameterSet temp2;
0185       temp2.addParameter<std::vector<edm::InputTag>>("denominator", {});
0186       temp2.addParameter<std::vector<edm::InputTag>>("numerator",
0187                                                      {
0188                                                          edm::InputTag("pfDeepCSVJetTags", "probb"),
0189                                                          edm::InputTag("pfDeepCSVJetTags", "probbb"),
0190                                                      });
0191       temp2.addParameter<std::string>("name", "BvsAll");
0192       temp1.push_back(temp2);
0193     }
0194     {
0195       edm::ParameterSet temp2;
0196       temp2.addParameter<std::vector<edm::InputTag>>("denominator",
0197                                                      {
0198                                                          edm::InputTag("pfDeepCSVJetTags", "probc"),
0199                                                          edm::InputTag("pfDeepCSVJetTags", "probb"),
0200                                                          edm::InputTag("pfDeepCSVJetTags", "probbb"),
0201                                                      });
0202       temp2.addParameter<std::vector<edm::InputTag>>("numerator",
0203                                                      {
0204                                                          edm::InputTag("pfDeepCSVJetTags", "probc"),
0205                                                      });
0206       temp2.addParameter<std::string>("name", "CvsB");
0207       temp1.push_back(temp2);
0208     }
0209     {
0210       edm::ParameterSet temp2;
0211       temp2.addParameter<std::vector<edm::InputTag>>("denominator",
0212                                                      {
0213                                                          edm::InputTag("pfDeepCSVJetTags", "probudsg"),
0214                                                          edm::InputTag("pfDeepCSVJetTags", "probc"),
0215                                                      });
0216       temp2.addParameter<std::vector<edm::InputTag>>("numerator",
0217                                                      {
0218                                                          edm::InputTag("pfDeepCSVJetTags", "probc"),
0219                                                      });
0220       temp2.addParameter<std::string>("name", "CvsL");
0221       temp1.push_back(temp2);
0222     }
0223     desc.addVPSet("discriminators", vpsd1, temp1);
0224   }
0225   descriptions.addDefault(desc);
0226 }
0227 
0228 // define this as a plug-in
0229 DEFINE_FWK_MODULE(BTagProbabilityToDiscriminator);