File indexing completed on 2024-11-25 02:29:55
0001
0002 #include "RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h"
0003 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0004 #include "FWCore/Utilities/interface/FileInPath.h"
0005 #include <iostream>
0006 #include <fstream>
0007 using namespace egammaTools;
0008
0009 EgammaDNNHelper::EgammaDNNHelper(const DNNConfiguration& cfg,
0010 const ModelSelector& modelSelector,
0011 const std::vector<std::string>& availableVars)
0012 : cfg_(cfg),
0013 modelSelector_(modelSelector),
0014 nModels_(cfg_.modelsFiles.size()),
0015 tf_sessions_cache_(cfg_.modelsFiles.size()) {
0016 initTensorFlowSessions();
0017 initScalerFiles(availableVars);
0018 }
0019
0020 void EgammaDNNHelper::initTensorFlowSessions() {
0021
0022 LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs and sessions";
0023 size_t i = 0;
0024 for (auto& model_file : cfg_.modelsFiles) {
0025 tf_sessions_cache_[i] = std::make_unique<tensorflow::SessionCache>(edm::FileInPath(model_file).fullPath());
0026 i++;
0027 }
0028 LogDebug("EgammaDNNHelper") << "TF sessions initialized";
0029 }
0030
0031 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
0032 for (const auto& scaler_file : cfg_.scalersFiles) {
0033
0034 std::vector<ScalerConfiguration> features;
0035 std::ifstream inputfile_scaler{edm::FileInPath(scaler_file).fullPath()};
0036 int ninputs = 0;
0037 if (inputfile_scaler.fail()) {
0038 throw cms::Exception("MissingFile") << "Scaler file for PFid DNN not found";
0039 } else {
0040
0041 float par1, par2;
0042 std::string varName, type_str;
0043 uint type;
0044 while (inputfile_scaler >> varName >> type_str >> par1 >> par2) {
0045 if (type_str == "stdscale")
0046 type = 1;
0047 else if (type_str == "minmax")
0048 type = 2;
0049 else if (type_str == "custom1")
0050 type = 3;
0051 else
0052 type = 0;
0053 features.push_back(ScalerConfiguration{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
0054
0055 auto match = std::find(availableVars.begin(), availableVars.end(), varName);
0056 if (match == std::end(availableVars)) {
0057 throw cms::Exception("MissingVariable")
0058 << "Requested variable (" << varName << ") not available between DNN inputs";
0059 }
0060 ninputs += 1;
0061 }
0062 }
0063 inputfile_scaler.close();
0064 featuresMap_.push_back(features);
0065 nInputs_.push_back(ninputs);
0066 }
0067 }
0068
0069 std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
0070 const std::map<std::string, float>& variables) const {
0071
0072
0073 const auto modelIndex = modelSelector_(variables);
0074 std::vector<float> inputs;
0075
0076
0077 for (auto& [varName, type, par1, par2] : featuresMap_[modelIndex]) {
0078 if (type == 1)
0079 inputs.push_back((variables.at(varName) - par1) / par2);
0080 else if (type == 2)
0081 inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
0082 else if (type == 3)
0083 inputs.push_back(2 * (variables.at(varName) - par1) / (par2 - par1) - 1.);
0084 else {
0085 inputs.push_back(variables.at(varName));
0086 }
0087
0088
0089 }
0090 return std::make_pair(modelIndex, inputs);
0091 }
0092
0093 std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
0094 const std::vector<std::map<std::string, float>>& candidates) const {
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108 size_t nCandidates = candidates.size();
0109 std::vector<std::vector<uint>> indexMap(nModels_);
0110 std::vector<std::vector<float>> inputsVectors(nCandidates);
0111 std::vector<uint> counts(nModels_);
0112
0113 LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
0114
0115 uint icand = 0;
0116 for (auto& candidate : candidates) {
0117 LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
0118 const auto& [model_index, inputs] = getScaledInputs(candidate);
0119 counts[model_index] += 1;
0120 indexMap[model_index].push_back(icand);
0121 inputsVectors[icand] = inputs;
0122 icand++;
0123 }
0124
0125
0126 std::vector<tensorflow::Tensor> input_tensors(nModels_);
0127
0128 std::vector<float*> input_tensors_pointer(nModels_);
0129 for (size_t i = 0; i < nModels_; i++) {
0130 LogDebug("EgammaDNNHelper") << "Initializing TF input " << i << " with rows:" << counts[i]
0131 << " and cols:" << nInputs_[i];
0132 input_tensors[i] = tensorflow::Tensor{tensorflow::DT_FLOAT, {counts[i], nInputs_[i]}};
0133 input_tensors_pointer[i] = input_tensors[i].flat<float>().data();
0134 }
0135
0136
0137 for (size_t m = 0; m < nModels_; m++) {
0138 LogDebug("EgammaDNNHelper") << "Loading TF input tensor for model: " << m;
0139 float* T = input_tensors_pointer[m];
0140 for (size_t cand_index : indexMap[m]) {
0141 for (size_t k = 0; k < nInputs_[m]; k++, T++) {
0142 *T = inputsVectors[cand_index][k];
0143 }
0144 }
0145 }
0146
0147
0148
0149 std::vector<std::pair<uint, std::pair<uint, std::vector<float>>>> outputs;
0150
0151 for (size_t m = 0; m < nModels_; m++) {
0152 if (counts[m] == 0)
0153 continue;
0154 std::vector<tensorflow::Tensor> output;
0155 LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << "objects";
0156 tensorflow::run(tf_sessions_cache_[m]->getSession(),
0157 {{cfg_.inputTensorName, input_tensors[m]}},
0158 {cfg_.outputTensorName},
0159 &output);
0160
0161 const auto& r = output[0].tensor<float, 2>();
0162
0163 LogDebug("EgammaDNNHelper") << "Model " << m << " has " << cfg_.outputDim[m] << " nodes!";
0164 for (uint b = 0; b < counts[m]; b++) {
0165
0166 std::vector<float> result(cfg_.outputDim[m]);
0167 for (size_t k = 0; k < cfg_.outputDim[m]; k++) {
0168 result[k] = r(b, k);
0169 LogDebug("EgammaDNNHelper") << "For Object " << b + 1 << " : Node " << k + 1 << " score = " << r(b, k);
0170 }
0171
0172 const auto cand_index = indexMap[m][b];
0173 outputs.push_back(std::make_pair(cand_index, std::make_pair(m, result)));
0174 }
0175 }
0176
0177 std::sort(outputs.begin(), outputs.end());
0178 std::vector<std::pair<uint, std::vector<float>>> final_outputs(outputs.size());
0179 std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
0180
0181 return final_outputs;
0182 }