File indexing completed on 2025-01-08 03:36:25
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #include <memory>
0021
0022
0023 #include "FWCore/Framework/interface/Frameworkfwd.h"
0024 #include "FWCore/Framework/interface/stream/EDProducer.h"
0025
0026 #include "FWCore/Framework/interface/Event.h"
0027 #include "FWCore/Framework/interface/MakerMacros.h"
0028
0029 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0030 #include "FWCore/Utilities/interface/StreamID.h"
0031
0032 #include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"
0033 #include "DataFormats/Common/interface/RefToBase.h"
0034 #include "DataFormats/BTauReco/interface/ShallowTagInfo.h"
0035 #include "DataFormats/BTauReco/interface/TaggingVariable.h"
0036 #include "DataFormats/BTauReco/interface/JetTag.h"
0037
0038
0039 #include "lwtnn/LightweightNeuralNetwork.hh"
0040 #include "lwtnn/parse_json.hh"
0041
0042 #include <fstream>
0043 #include <map>
0044 #include <set>
0045 #include <vector>
0046 #include <string>
0047 #include <iostream>
0048
0049 #include <boost/algorithm/string.hpp>
0050 using namespace std;
0051 using namespace reco;
0052
0053
0054
0055
0056 namespace {
0057
0058 struct MVAVar {
0059 std::string name;
0060 reco::btau::TaggingVariableName id;
0061 int index;
0062 double default_value;
0063 };
0064
0065 class NeuralNetworkAndConstants {
0066 public:
0067 NeuralNetworkAndConstants(const edm::ParameterSet&);
0068
0069 std::unique_ptr<const lwt::LightweightNeuralNetwork> const& neural_network() const { return neural_network_; }
0070 vector<string> const& outputs() const { return outputs_; }
0071 bool check_sv_for_defaults() const { return check_sv_for_defaults_; }
0072 map<string, string> const& toadd() const { return toadd_; }
0073 vector<MVAVar> const& variables() const { return variables_; }
0074
0075 private:
0076 std::unique_ptr<const lwt::LightweightNeuralNetwork> neural_network_;
0077 vector<string> outputs_;
0078 bool check_sv_for_defaults_;
0079 map<string, string> toadd_;
0080 vector<MVAVar> variables_;
0081 };
0082
0083 class DeepFlavourJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<NeuralNetworkAndConstants>> {
0084 public:
0085 explicit DeepFlavourJetTagsProducer(const edm::ParameterSet&, NeuralNetworkAndConstants const*);
0086 ~DeepFlavourJetTagsProducer() override;
0087
0088 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0089
0090 static std::unique_ptr<NeuralNetworkAndConstants> initializeGlobalCache(const edm::ParameterSet& iConfig) {
0091 return std::make_unique<NeuralNetworkAndConstants>(iConfig);
0092 }
0093
0094 static void globalEndJob(NeuralNetworkAndConstants*) {}
0095
0096 private:
0097 typedef std::vector<reco::ShallowTagInfo> INFOS;
0098 void beginStream(edm::StreamID) override {}
0099 void produce(edm::Event&, const edm::EventSetup&) override;
0100 void endStream() override {}
0101
0102
0103 const edm::EDGetTokenT<INFOS> src_;
0104 lwt::ValueMap inputs_;
0105 };
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119 NeuralNetworkAndConstants::NeuralNetworkAndConstants(const edm::ParameterSet& iConfig)
0120 : check_sv_for_defaults_(iConfig.getParameter<bool>("checkSVForDefaults")) {
0121 bool mean_padding = iConfig.getParameter<bool>("meanPadding");
0122
0123
0124 edm::FileInPath nnconfig = iConfig.getParameter<edm::FileInPath>("NNConfig");
0125 ifstream jsonfile(nnconfig.fullPath());
0126 auto config = lwt::parse_json(jsonfile);
0127
0128
0129 neural_network_ =
0130 std::make_unique<const lwt::LightweightNeuralNetwork>(config.inputs, config.layers, config.outputs);
0131
0132 outputs_ = config.outputs;
0133 set<string> outset(outputs_.begin(), outputs_.end());
0134
0135
0136 edm::ParameterSet toaddPSet = iConfig.getParameter<edm::ParameterSet>("toAdd");
0137 for (auto const& output : toaddPSet.getParameterNamesForType<string>()) {
0138 string target = toaddPSet.getParameter<string>(output);
0139 if (outset.find(output) == outset.end())
0140 throw cms::Exception("RuntimeError") << "The required output: " << output << " to be added to " << target
0141 << " could not be found among the NN outputs" << endl;
0142 if (outset.find(target) == outset.end())
0143 throw cms::Exception("RuntimeError") << "The required output: " << target << ", target of addition of "
0144 << output << " could not be found among the NN outputs" << endl;
0145 toadd_[output] = target;
0146 }
0147
0148
0149 for (auto const& input : config.inputs) {
0150 MVAVar var;
0151 var.name = input.name;
0152
0153 vector<string> tokens;
0154 if (var.name != "Jet_JP" && var.name != "Jet_JBP" && var.name != "Jet_SoftMu" && var.name != "Jet_SoftEl") {
0155 boost::split(tokens, var.name, boost::is_any_of("_"));
0156 } else {
0157 tokens.push_back(var.name);
0158 }
0159 if (tokens.empty()) {
0160 throw cms::Exception("RuntimeError")
0161 << "I could not parse properly " << input.name << " as input feature" << std::endl;
0162 }
0163 var.id = reco::getTaggingVariableName(tokens.at(0));
0164
0165 if (var.id == reco::btau::lastTaggingVariable) {
0166 throw cms::Exception("ValueError")
0167 << "I could not find the TaggingVariable named " << tokens.at(0)
0168 << " from the NN input variable: " << input.name << ". Please check the spelling" << std::endl;
0169 }
0170 var.index = (tokens.size() == 2) ? stoi(tokens.at(1)) : -1;
0171 var.default_value =
0172 (mean_padding)
0173 ? 0.
0174 : -1 * input.offset;
0175
0176
0177 variables_.push_back(var);
0178 }
0179 }
0180
0181 DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet& iConfig,
0182 NeuralNetworkAndConstants const* gc)
0183 : src_(consumes<INFOS>(iConfig.getParameter<edm::InputTag>("src"))), inputs_() {
0184
0185 for (auto const& outnode : gc->outputs()) {
0186 if (gc->toadd().find(outnode) == gc->toadd().end()) {
0187 produces<JetTagCollection>(outnode);
0188 }
0189 }
0190 }
0191
0192 DeepFlavourJetTagsProducer::~DeepFlavourJetTagsProducer() {}
0193
0194
0195
0196
0197
0198
0199 void DeepFlavourJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) {
0200 NeuralNetworkAndConstants const* gc = globalCache();
0201 vector<string> const& outputs = gc->outputs();
0202 map<string, string> const& toadd = gc->toadd();
0203
0204
0205 edm::Handle<INFOS> taginfos;
0206 iEvent.getByToken(src_, taginfos);
0207
0208
0209
0210 vector<std::unique_ptr<JetTagCollection>> output_tags;
0211 output_tags.reserve(outputs.size());
0212 for (size_t i = 0; i < outputs.size(); ++i) {
0213 if (!taginfos->empty()) {
0214 edm::RefToBase<Jet> jj = taginfos->begin()->jet();
0215 output_tags.push_back(std::make_unique<JetTagCollection>(edm::makeRefToBaseProdFrom(jj, iEvent)));
0216 } else {
0217 output_tags.push_back(std::make_unique<JetTagCollection>());
0218 }
0219 }
0220
0221 int naninput = 0;
0222 int nanoutput = 0;
0223
0224
0225 for (auto& info : *(taginfos)) {
0226
0227 TaggingVariableList vars = info.taggingVariables();
0228
0229 bool notracks = (vars.get(reco::btau::jetNSelectedTracks) == 0);
0230 bool novtx = (vars.get(reco::btau::jetNSecondaryVertices) == 0);
0231 bool defaulted = (gc->check_sv_for_defaults()) ? (notracks && novtx) : notracks;
0232 lwt::ValueMap nnout;
0233
0234 if (!defaulted) {
0235 for (auto const& var : gc->variables()) {
0236 if (var.index >= 0) {
0237 std::vector<float> vals = vars.getList(var.id, false);
0238 inputs_[var.name] = (((int)vals.size()) > var.index) ? vals.at(var.index) : var.default_value;
0239 }
0240
0241 else {
0242 inputs_[var.name] = vars.get(var.id, var.default_value);
0243 }
0244
0245
0246 if (std::isnan(inputs_[var.name])) {
0247 naninput++;
0248 }
0249 }
0250
0251
0252 nnout = gc->neural_network()->compute(inputs_);
0253
0254
0255 for (auto const& entry : toadd) {
0256 nnout[entry.second] += nnout[entry.first];
0257 }
0258
0259
0260 for (const auto& entry : nnout) {
0261 if (std::isnan(entry.second)) {
0262 nanoutput++;
0263 }
0264 }
0265 }
0266
0267
0268 edm::RefToBase<Jet> key = info.jet();
0269
0270
0271 for (size_t i = 0; i < outputs.size(); ++i) {
0272 (*output_tags[i])[key] = (defaulted) ? -1 : nnout[outputs[i]];
0273 }
0274 }
0275
0276 if (naninput + nanoutput > 0) {
0277 edm::LogWarning("ValueError") << "The NN encountered " << naninput << " nan input TagInfo values and produced "
0278 << nanoutput << " nan output values";
0279 }
0280
0281
0282 for (size_t i = 0; i < outputs.size(); ++i) {
0283 if (toadd.find(outputs[i]) == toadd.end()) {
0284 iEvent.put(std::move(output_tags[i]), outputs[i]);
0285 }
0286 }
0287 }
0288
0289
0290 void DeepFlavourJetTagsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0291 edm::ParameterSetDescription desc;
0292
0293
0294 desc.add<edm::InputTag>("src", edm::InputTag("pfDeepCSVTagInfos"))
0295 ->setComment("InputTag for the source tag info collection.");
0296 desc.add<bool>("checkSVForDefaults", false)->setComment("Flag to check secondary vertex defaults.");
0297 desc.add<bool>("meanPadding", false)->setComment("Enable or disable mean padding for input features.");
0298 desc.add<edm::FileInPath>("NNConfig", edm::FileInPath("RecoBTag/Combined/data/DeepFlavourNoSL.json"))
0299 ->setComment("Path to the JSON file containing the neural network configuration.");
0300
0301
0302 edm::ParameterSetDescription toAddDesc;
0303 toAddDesc.setAllowAnything();
0304 desc.add<edm::ParameterSetDescription>("toAdd", toAddDesc)
0305 ->setComment(
0306 "ParameterSet for merging different NN outputs together. "
0307 "Each key is an output to merge, and its value is the target output.");
0308
0309
0310 descriptions.addWithDefaultLabel(desc);
0311 }
0312 }
0313
0314
0315 DEFINE_FWK_MODULE(DeepFlavourJetTagsProducer);