File indexing completed on 2023-05-26 22:38:23
0001
0002 #include "RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h"
0003 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0004 #include "FWCore/Utilities/interface/FileInPath.h"
0005 #include <iostream>
0006 #include <fstream>
0007 using namespace egammaTools;
0008
0009 EgammaDNNHelper::EgammaDNNHelper(const DNNConfiguration& cfg,
0010 const ModelSelector& modelSelector,
0011 const std::vector<std::string>& availableVars)
0012 : cfg_(cfg), modelSelector_(modelSelector), nModels_(cfg_.modelsFiles.size()), graphDefs_(cfg_.modelsFiles.size()) {
0013 initTensorFlowGraphs();
0014 initScalerFiles(availableVars);
0015 }
0016
0017 void EgammaDNNHelper::initTensorFlowGraphs() {
0018
0019 LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs";
0020 size_t i = 0;
0021 for (const auto& model_file : cfg_.modelsFiles) {
0022 graphDefs_[i] =
0023 std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(model_file).fullPath()));
0024 i++;
0025 }
0026 }
0027
0028 std::vector<tensorflow::Session*> EgammaDNNHelper::getSessions() const {
0029 std::vector<tensorflow::Session*> sessions;
0030 LogDebug("EgammaDNNHelper") << "Starting " << nModels_ << " TF sessions";
0031 sessions.reserve(graphDefs_.size());
0032 for (const auto& graphDef : graphDefs_) {
0033 sessions.push_back(tensorflow::createSession(graphDef.get()));
0034 }
0035 LogDebug("EgammaDNNHelper") << "TF sessions started";
0036 return sessions;
0037 }
0038
0039 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
0040 for (const auto& scaler_file : cfg_.scalersFiles) {
0041
0042 std::vector<ScalerConfiguration> features;
0043 std::ifstream inputfile_scaler{edm::FileInPath(scaler_file).fullPath()};
0044 int ninputs = 0;
0045 if (inputfile_scaler.fail()) {
0046 throw cms::Exception("MissingFile") << "Scaler file for PFid DNN not found";
0047 } else {
0048
0049 float par1, par2;
0050 std::string varName, type_str;
0051 uint type;
0052 while (inputfile_scaler >> varName >> type_str >> par1 >> par2) {
0053 if (type_str == "stdscale")
0054 type = 1;
0055 else if (type_str == "minmax")
0056 type = 2;
0057 else if (type_str == "custom1")
0058 type = 3;
0059 else
0060 type = 0;
0061 features.push_back(ScalerConfiguration{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
0062
0063 auto match = std::find(availableVars.begin(), availableVars.end(), varName);
0064 if (match == std::end(availableVars)) {
0065 throw cms::Exception("MissingVariable")
0066 << "Requested variable (" << varName << ") not available between DNN inputs";
0067 }
0068 ninputs += 1;
0069 }
0070 }
0071 inputfile_scaler.close();
0072 featuresMap_.push_back(features);
0073 nInputs_.push_back(ninputs);
0074 }
0075 }
0076
0077 std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
0078 const std::map<std::string, float>& variables) const {
0079
0080
0081 const auto modelIndex = modelSelector_(variables);
0082 std::vector<float> inputs;
0083
0084
0085 for (auto& [varName, type, par1, par2] : featuresMap_[modelIndex]) {
0086 if (type == 1)
0087 inputs.push_back((variables.at(varName) - par1) / par2);
0088 else if (type == 2)
0089 inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
0090 else if (type == 3)
0091 inputs.push_back(2 * (variables.at(varName) - par1) / (par2 - par1) - 1.);
0092 else {
0093 inputs.push_back(variables.at(varName));
0094 }
0095
0096
0097 }
0098 return std::make_pair(modelIndex, inputs);
0099 }
0100
0101 std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
0102 const std::vector<std::map<std::string, float>>& candidates,
0103 const std::vector<tensorflow::Session*>& sessions) const {
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117 size_t nCandidates = candidates.size();
0118 std::vector<std::vector<uint>> indexMap(nModels_);
0119 std::vector<std::vector<float>> inputsVectors(nCandidates);
0120 std::vector<uint> counts(nModels_);
0121
0122 LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
0123
0124 uint icand = 0;
0125 for (auto& candidate : candidates) {
0126 LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
0127 const auto& [model_index, inputs] = getScaledInputs(candidate);
0128 counts[model_index] += 1;
0129 indexMap[model_index].push_back(icand);
0130 inputsVectors[icand] = inputs;
0131 icand++;
0132 }
0133
0134
0135 std::vector<tensorflow::Tensor> input_tensors(nModels_);
0136
0137 std::vector<float*> input_tensors_pointer(nModels_);
0138 for (size_t i = 0; i < nModels_; i++) {
0139 LogDebug("EgammaDNNHelper") << "Initializing TF input " << i << " with rows:" << counts[i]
0140 << " and cols:" << nInputs_[i];
0141 input_tensors[i] = tensorflow::Tensor{tensorflow::DT_FLOAT, {counts[i], nInputs_[i]}};
0142 input_tensors_pointer[i] = input_tensors[i].flat<float>().data();
0143 }
0144
0145
0146 for (size_t m = 0; m < nModels_; m++) {
0147 LogDebug("EgammaDNNHelper") << "Loading TF input tensor for model: " << m;
0148 float* T = input_tensors_pointer[m];
0149 for (size_t cand_index : indexMap[m]) {
0150 for (size_t k = 0; k < nInputs_[m]; k++, T++) {
0151 *T = inputsVectors[cand_index][k];
0152 }
0153 }
0154 }
0155
0156
0157
0158 std::vector<std::pair<uint, std::pair<uint, std::vector<float>>>> outputs;
0159
0160 for (size_t m = 0; m < nModels_; m++) {
0161 if (counts[m] == 0)
0162 continue;
0163 std::vector<tensorflow::Tensor> output;
0164 LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << "objects";
0165 tensorflow::run(sessions[m], {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
0166
0167 const auto& r = output[0].tensor<float, 2>();
0168
0169 LogDebug("EgammaDNNHelper") << "Model " << m << " has " << cfg_.outputDim[m] << " nodes!";
0170 for (uint b = 0; b < counts[m]; b++) {
0171
0172 std::vector<float> result(cfg_.outputDim[m]);
0173 for (size_t k = 0; k < cfg_.outputDim[m]; k++) {
0174 result[k] = r(b, k);
0175 LogDebug("EgammaDNNHelper") << "For Object " << b + 1 << " : Node " << k + 1 << " score = " << r(b, k);
0176 }
0177
0178 const auto cand_index = indexMap[m][b];
0179 outputs.push_back(std::make_pair(cand_index, std::make_pair(m, result)));
0180 }
0181 }
0182
0183 std::sort(outputs.begin(), outputs.end());
0184 std::vector<std::pair<uint, std::vector<float>>> final_outputs(outputs.size());
0185 std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
0186
0187 return final_outputs;
0188 }