Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:33:36

0001 // -*- C++ -*-
0002 //
0003 // Package:    ​RecoBTag/​SecondaryVertex
0004 // Class:      DeepFlavourJetTagsProducer
0005 //
0006 /**\class DeepFlavourJetTagsProducer DeepFlavourJetTagsProducer.cc ​RecoBTag/DeepFlavour/plugins/DeepFlavourJetTagsProducer.cc
0007  *
0008  * Description: EDProducer that produces collection of ShallowTagInfos
0009  *
0010  * Implementation:
0011  *    A collection of CandIPTagInfo and CandSecondaryVertexTagInfo and a CombinedSVComputer ESHandle is taken as input and a collection of ShallowTagInfos
0012  *    is produced as output.
0013  */
0014 //
0015 // Original Author:  Mauro Verzetti (U. Rochester)
0016 //
0017 //
0018 
0019 // system include files
0020 #include <memory>
0021 
0022 // user include files
0023 #include "FWCore/Framework/interface/Frameworkfwd.h"
0024 #include "FWCore/Framework/interface/stream/EDProducer.h"
0025 
0026 #include "FWCore/Framework/interface/Event.h"
0027 #include "FWCore/Framework/interface/MakerMacros.h"
0028 
0029 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0030 #include "FWCore/Utilities/interface/StreamID.h"
0031 
0032 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0033 #include "DataFormats/Common/interface/RefToBase.h"
0034 #include "DataFormats/BTauReco/interface/ShallowTagInfo.h"
0035 #include "DataFormats/BTauReco/interface/TaggingVariable.h"
0036 #include "DataFormats/BTauReco/interface/JetTag.h"
0037 
0038 //from lwtnn
0039 #include "lwtnn/LightweightNeuralNetwork.hh"
0040 #include "lwtnn/parse_json.hh"
0041 
0042 #include <fstream>
0043 #include <map>
0044 #include <set>
0045 #include <vector>
0046 #include <string>
0047 #include <iostream>
0048 
0049 #include <boost/algorithm/string.hpp>
0050 using namespace std;
0051 using namespace reco;
0052 //
0053 // class declaration
0054 //
0055 
0056 namespace {
0057 
0058   struct MVAVar {
0059     std::string name;
0060     reco::btau::TaggingVariableName id;
0061     int index;
0062     double default_value;
0063   };
0064 
0065   class NeuralNetworkAndConstants {
0066   public:
0067     NeuralNetworkAndConstants(const edm::ParameterSet&);
0068 
0069     std::unique_ptr<const lwt::LightweightNeuralNetwork> const& neural_network() const { return neural_network_; }
0070     vector<string> const& outputs() const { return outputs_; }
0071     bool check_sv_for_defaults() const { return check_sv_for_defaults_; }
0072     map<string, string> const& toadd() const { return toadd_; }
0073     vector<MVAVar> const& variables() const { return variables_; }
0074 
0075   private:
0076     std::unique_ptr<const lwt::LightweightNeuralNetwork> neural_network_;
0077     vector<string> outputs_;
0078     bool check_sv_for_defaults_;
0079     map<string, string> toadd_;
0080     vector<MVAVar> variables_;
0081   };
0082 
0083   class DeepFlavourJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<NeuralNetworkAndConstants>> {
0084   public:
0085     explicit DeepFlavourJetTagsProducer(const edm::ParameterSet&, NeuralNetworkAndConstants const*);
0086     ~DeepFlavourJetTagsProducer() override;
0087 
0088     static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0089 
0090     static std::unique_ptr<NeuralNetworkAndConstants> initializeGlobalCache(const edm::ParameterSet& iConfig) {
0091       return std::make_unique<NeuralNetworkAndConstants>(iConfig);
0092     }
0093 
0094     static void globalEndJob(NeuralNetworkAndConstants*) {}
0095 
0096   private:
0097     typedef std::vector<reco::ShallowTagInfo> INFOS;
0098     void beginStream(edm::StreamID) override {}
0099     void produce(edm::Event&, const edm::EventSetup&) override;
0100     void endStream() override {}
0101 
0102     // ----------member data ---------------------------
0103     const edm::EDGetTokenT<INFOS> src_;
0104     lwt::ValueMap inputs_;  //typedef of unordered_map<string, float>
0105   };
0106 
0107   //
0108   // constants, enums and typedefs
0109   //
0110 
0111   //
0112   // static data member definitions
0113   //
0114 
0115   //
0116   // constructors and destructor
0117   //
0118 
0119   NeuralNetworkAndConstants::NeuralNetworkAndConstants(const edm::ParameterSet& iConfig)
0120       : check_sv_for_defaults_(iConfig.getParameter<bool>("checkSVForDefaults")) {
0121     bool mean_padding = iConfig.getParameter<bool>("meanPadding");
0122 
0123     //parse json
0124     edm::FileInPath nnconfig = iConfig.getParameter<edm::FileInPath>("NNConfig");
0125     ifstream jsonfile(nnconfig.fullPath());
0126     auto config = lwt::parse_json(jsonfile);
0127 
0128     //create NN and store the output names for the future
0129     neural_network_ =
0130         std::make_unique<const lwt::LightweightNeuralNetwork>(config.inputs, config.layers, config.outputs);
0131 
0132     outputs_ = config.outputs;
0133     set<string> outset(outputs_.begin(), outputs_.end());
0134 
0135     //in case we want to merge some different outputs together
0136     edm::ParameterSet toaddPSet = iConfig.getParameter<edm::ParameterSet>("toAdd");
0137     for (auto const& output : toaddPSet.getParameterNamesForType<string>()) {
0138       string target = toaddPSet.getParameter<string>(output);
0139       if (outset.find(output) == outset.end())
0140         throw cms::Exception("RuntimeError") << "The required output: " << output << " to be added to " << target
0141                                              << " could not be found among the NN outputs" << endl;
0142       if (outset.find(target) == outset.end())
0143         throw cms::Exception("RuntimeError") << "The required output: " << target << ", target of addition of "
0144                                              << output << " could not be found among the NN outputs" << endl;
0145       toadd_[output] = target;
0146     }
0147 
0148     //get the set-up for the inputs
0149     for (auto const& input : config.inputs) {
0150       MVAVar var;
0151       var.name = input.name;
0152       //two paradigms
0153       vector<string> tokens;
0154       if (var.name != "Jet_JP" && var.name != "Jet_JBP" && var.name != "Jet_SoftMu" && var.name != "Jet_SoftEl") {
0155         boost::split(tokens, var.name, boost::is_any_of("_"));
0156       } else {
0157         tokens.push_back(var.name);
0158       }
0159       if (tokens.empty()) {
0160         throw cms::Exception("RuntimeError")
0161             << "I could not parse properly " << input.name << " as input feature" << std::endl;
0162       }
0163       var.id = reco::getTaggingVariableName(tokens.at(0));
0164       //die grafully if the tagging variable is not found!
0165       if (var.id == reco::btau::lastTaggingVariable) {
0166         throw cms::Exception("ValueError")
0167             << "I could not find the TaggingVariable named " << tokens.at(0)
0168             << " from the NN input variable: " << input.name << ". Please check the spelling" << std::endl;
0169       }
0170       var.index = (tokens.size() == 2) ? stoi(tokens.at(1)) : -1;
0171       var.default_value =
0172           (mean_padding)
0173               ? 0.
0174               : -1 * input.offset;  //set default to -offset so that when scaling (val+offset)*scale the outcome is 0
0175       //for mean padding it is set to zero so that undefined values are assigned -mean/scale
0176 
0177       variables_.push_back(var);
0178     }
0179   }
0180 
0181   DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet& iConfig,
0182                                                          NeuralNetworkAndConstants const* gc)
0183       : src_(consumes<INFOS>(iConfig.getParameter<edm::InputTag>("src"))), inputs_() {
0184     //produce one output kind per node
0185     for (auto const& outnode : gc->outputs()) {
0186       if (gc->toadd().find(outnode) == gc->toadd().end()) {  //produce output only if does not get added
0187         produces<JetTagCollection>(outnode);
0188       }
0189     }
0190   }
0191 
0192   DeepFlavourJetTagsProducer::~DeepFlavourJetTagsProducer() {}
0193 
0194   //
0195   // member functions
0196   //
0197 
0198   // ------------ method called to produce the data  ------------
0199   void DeepFlavourJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0200     NeuralNetworkAndConstants const* gc = globalCache();
0201     vector<string> const& outputs = gc->outputs();
0202     map<string, string> const& toadd = gc->toadd();
0203 
0204     // get input TagInfos
0205     edm::Handle<INFOS> taginfos;
0206     iEvent.getByToken(src_, taginfos);
0207 
0208     // create the output collection
0209     // which is a "map" RefToBase<Jet> --> float
0210     vector<std::unique_ptr<JetTagCollection>> output_tags;
0211     output_tags.reserve(outputs.size());
0212     for (size_t i = 0; i < outputs.size(); ++i) {
0213       if (!taginfos->empty()) {
0214         edm::RefToBase<Jet> jj = taginfos->begin()->jet();
0215         output_tags.push_back(std::make_unique<JetTagCollection>(edm::makeRefToBaseProdFrom(jj, iEvent)));
0216       } else {
0217         output_tags.push_back(std::make_unique<JetTagCollection>());
0218       }
0219     }
0220 
0221     int naninput = 0;
0222     int nanoutput = 0;
0223 
0224     // loop over TagInfos
0225     for (auto& info : *(taginfos)) {
0226       //convert the taginfo into the value map in the appropriate way
0227       TaggingVariableList vars = info.taggingVariables();
0228       //if there are no tracks there's no point in doing it
0229       bool notracks = (vars.get(reco::btau::jetNSelectedTracks) == 0);
0230       bool novtx = (vars.get(reco::btau::jetNSecondaryVertices) == 0);
0231       bool defaulted = (gc->check_sv_for_defaults()) ? (notracks && novtx) : notracks;
0232       lwt::ValueMap nnout;  //returned value
0233 
0234       if (!defaulted) {
0235         for (auto const& var : gc->variables()) {
0236           if (var.index >= 0) {
0237             std::vector<float> vals = vars.getList(var.id, false);
0238             inputs_[var.name] = (((int)vals.size()) > var.index) ? vals.at(var.index) : var.default_value;
0239           }
0240           //single value tagging var
0241           else {
0242             inputs_[var.name] = vars.get(var.id, var.default_value);
0243           }
0244 
0245           //count if the input is nan
0246           if (std::isnan(inputs_[var.name])) {
0247             naninput++;
0248           }
0249         }
0250 
0251         //compute NN output(s)
0252         nnout = gc->neural_network()->compute(inputs_);
0253 
0254         //merge outputs
0255         for (auto const& entry : toadd) {
0256           nnout[entry.second] += nnout[entry.first];
0257         }
0258 
0259         //count if the output is nan
0260         for (const auto& entry : nnout) {
0261           if (std::isnan(entry.second)) {
0262             nanoutput++;
0263           }
0264         }
0265       }
0266 
0267       //ket the maps key
0268       edm::RefToBase<Jet> key = info.jet();
0269 
0270       //dump the NN output(s)
0271       for (size_t i = 0; i < outputs.size(); ++i) {
0272         (*output_tags[i])[key] = (defaulted) ? -1 : nnout[outputs[i]];
0273       }
0274     }
0275 
0276     if (naninput + nanoutput > 0) {
0277       edm::LogWarning("ValueError") << "The NN encountered " << naninput << " nan input TagInfo values and produced "
0278                                     << nanoutput << " nan output values";
0279     }
0280 
0281     // put the output in the event
0282     for (size_t i = 0; i < outputs.size(); ++i) {
0283       if (toadd.find(outputs[i]) == toadd.end()) {
0284         iEvent.put(std::move(output_tags[i]), outputs[i]);
0285       }
0286     }
0287   }
0288 
0289   // ------------ method fills 'descriptions' with the allowed parameters for the module  ------------
0290   void DeepFlavourJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0291     //The following says we do not know what parameters are allowed so do no validation
0292     // Please change this to state exactly what you do use, even if it is no parameters
0293     edm::ParameterSetDescription desc;
0294     desc.setUnknown();
0295     descriptions.addDefault(desc);
0296   }
0297 }  // end unnamed namespace
0298 
0299 //define this as a plug-in
0300 DEFINE_FWK_MODULE(DeepFlavourJetTagsProducer);