File indexing completed on 2024-09-24 22:51:35
0001 #include "RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h"
0002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0003 #include "FWCore/ParameterSet/interface/FileInPath.h"
0004 #include "TMath.h"
0005 #include <iostream>
0006 #include <fstream>
0007 using namespace reco;
0008
0009 const std::vector<std::string> DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy",
0010 "cl_et",
0011 "cl_eta",
0012 "cl_phi",
0013 "cl_ieta",
0014 "cl_iphi",
0015 "cl_iz",
0016 "cl_seed_dEta",
0017 "cl_seed_dPhi",
0018 "cl_seed_dEnergy",
0019 "cl_seed_dEt",
0020 "cl_nxtals"};
0021 const std::vector<std::string> DeepSCGraphEvaluation::availableWindowInputs = {
0022 "max_cl_energy", "max_cl_et", "max_cl_eta", "max_cl_phi", "max_cl_ieta", "max_cl_iphi",
0023 "max_cl_iz", "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals",
0024 "min_cl_energy", "min_cl_et", "min_cl_eta", "min_cl_phi", "min_cl_ieta", "min_cl_iphi",
0025 "min_cl_iz", "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals",
0026 "avg_cl_energy", "avg_cl_et", "avg_cl_eta", "avg_cl_phi", "avg_cl_ieta", "avg_cl_iphi",
0027 "avg_cl_iz", "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"};
0028 const std::vector<std::string> DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"};
0029
0030 DeepSCGraphEvaluation::DeepSCGraphEvaluation(const DeepSCConfiguration& cfg) : cfg_(cfg) {
0031
0032 initTensorFlowGraphAndSession();
0033
0034 inputFeaturesClusters =
0035 readInputFeaturesConfig(cfg_.configFileClusterFeatures, DeepSCGraphEvaluation::availableClusterInputs);
0036 if (inputFeaturesClusters.size() != cfg_.nClusterFeatures) {
0037 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0038 << "parameters in the scaler file.";
0039 }
0040 inputFeaturesWindows =
0041 readInputFeaturesConfig(cfg_.configFileWindowFeatures, DeepSCGraphEvaluation::availableWindowInputs);
0042 if (inputFeaturesWindows.size() != cfg_.nWindowFeatures) {
0043 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0044 << "parameters in the scaler file.";
0045 }
0046 inputFeaturesHits = readInputFeaturesConfig(cfg_.configFileHitsFeatures, DeepSCGraphEvaluation::availableHitsInputs);
0047 if (inputFeaturesHits.size() != cfg_.nHitsFeatures) {
0048 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0049 << "parameters in the scaler file.";
0050 }
0051 }
0052
0053 DeepSCGraphEvaluation::~DeepSCGraphEvaluation() {
0054 if (session_ != nullptr)
0055 tensorflow::closeSession(session_);
0056 }
0057
0058 void DeepSCGraphEvaluation::initTensorFlowGraphAndSession() {
0059
0060 LogDebug("DeepSCGraphEvaluation") << "Loading graph";
0061 graphDef_ =
0062 std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath()));
0063 LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions";
0064 session_ = tensorflow::createSession(graphDef_.get());
0065 LogDebug("DeepSCGraphEvaluation") << "TF ready";
0066 }
0067
0068 DeepSCInputs::InputConfigs DeepSCGraphEvaluation::readInputFeaturesConfig(
0069 std::string file, const std::vector<std::string>& availableInputs) const {
0070 DeepSCInputs::InputConfigs features;
0071 LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath();
0072 std::ifstream inputfile{edm::FileInPath(file).fullPath()};
0073 if (inputfile.fail()) {
0074 throw cms::Exception("MissingFile") << "Input features config file not found: " << file;
0075 } else {
0076
0077 float par1, par2;
0078 std::string varName, type_str;
0079 DeepSCInputs::ScalerType type;
0080 while (inputfile >> varName >> type_str >> par1 >> par2) {
0081 if (type_str == "MeanRms")
0082 type = DeepSCInputs::ScalerType::MeanRms;
0083 else if (type_str == "MinMax")
0084 type = DeepSCInputs::ScalerType::MinMax;
0085 else
0086 type = DeepSCInputs::ScalerType::None;
0087 features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
0088
0089 auto match = std::find(availableInputs.begin(), availableInputs.end(), varName);
0090 if (match == std::end(availableInputs)) {
0091 throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs";
0092 }
0093 LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str;
0094 }
0095 }
0096 return features;
0097 }
0098
0099 std::vector<float> DeepSCGraphEvaluation::getScaledInputs(const DeepSCInputs::FeaturesMap& variables,
0100 const DeepSCInputs::InputConfigs& config) const {
0101 std::vector<float> inputs;
0102 inputs.reserve(config.size());
0103
0104
0105 for (auto& [varName, type, par1, par2] : config) {
0106 if (type == DeepSCInputs::ScalerType::MeanRms)
0107 inputs.push_back((variables.at(varName) - par1) / par2);
0108 else if (type == DeepSCInputs::ScalerType::MinMax)
0109 inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
0110 else if (type == DeepSCInputs::ScalerType::None) {
0111 inputs.push_back(variables.at(varName));
0112 }
0113
0114
0115 }
0116 return inputs;
0117 }
0118
0119 std::vector<std::vector<float>> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const {
0120 LogDebug("DeepSCGraphEvaluation") << "Starting evaluation";
0121
0122
0123 std::vector<std::vector<float>> outputs_clustering;
0124
0125
0126
0127 size_t nInputs = inputs.clustersX.size();
0128 uint iB = -1;
0129 while (nInputs > 0) {
0130 iB++;
0131 size_t nItems;
0132 if (nInputs >= cfg_.batchSize) {
0133 nItems = cfg_.batchSize;
0134 nInputs -= cfg_.batchSize;
0135 } else {
0136 nItems = nInputs;
0137 nInputs = 0;
0138 }
0139
0140
0141 tensorflow::Tensor clsX_{tensorflow::DT_FLOAT,
0142 {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}};
0143 tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.nWindowFeatures}};
0144 tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT,
0145 {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}};
0146 tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.maxNClusters, 1}};
0147 tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems)}};
0148
0149
0150 for (size_t b = 0; b < nItems; b++) {
0151 const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b];
0152
0153 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0154
0155 for (size_t z = 0; z < cfg_.nClusterFeatures; z++) {
0156 if (k < cls_data.size()) {
0157 clsX_.tensor<float, 3>()(b, k, z) = float(cls_data[k][z]);
0158 } else {
0159 clsX_.tensor<float, 3>()(b, k, z) = 0.;
0160 }
0161 }
0162 }
0163 }
0164
0165
0166 for (size_t b = 0; b < nItems; b++) {
0167 const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b];
0168
0169 for (size_t k = 0; k < cfg_.nWindowFeatures; k++) {
0170 windX_.matrix<float>()(b, k) = float(wind_features[k]);
0171 }
0172 }
0173
0174
0175 for (size_t b = 0; b < nItems; b++) {
0176 const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b];
0177 size_t ncls_in_window = hits_data.size();
0178
0179 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0180
0181 size_t nhits_in_cluster;
0182 if (k < ncls_in_window)
0183 nhits_in_cluster = hits_data[k].size();
0184 else
0185 nhits_in_cluster = 0;
0186
0187
0188 for (size_t j = 0; j < cfg_.maxNRechits; j++) {
0189
0190 bool ok = j < nhits_in_cluster;
0191
0192 for (size_t z = 0; z < cfg_.nHitsFeatures; z++) {
0193 if (ok)
0194 hitsX_.tensor<float, 4>()(b, k, j, z) = float(hits_data[k][j][z]);
0195 else
0196 hitsX_.tensor<float, 4>()(b, k, j, z) = 0.;
0197 }
0198 }
0199 }
0200 }
0201
0202
0203 for (size_t b = 0; b < nItems; b++) {
0204 const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b];
0205
0206 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0207 if (k < isSeed_data.size()) {
0208 isSeedX_.tensor<float, 3>()(b, k, 0) = float(isSeed_data[k]);
0209 } else {
0210 isSeedX_.tensor<float, 3>()(b, k, 0) = 0.;
0211 }
0212 }
0213 }
0214
0215 for (size_t b = 0; b < nItems; b++) {
0216 nClsSize_.vec<float>()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size());
0217 }
0218
0219 std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict = {
0220 {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}};
0221
0222
0223 std::vector<tensorflow::Tensor> outputs_tf;
0224
0225
0226 LogDebug("DeepSCGraphEvaluation") << "Run model";
0227 tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf);
0228
0229 const auto& y_cl = outputs_tf[0].tensor<float, 3>();
0230
0231 for (size_t b = 0; b < nItems; b++) {
0232 uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size();
0233 std::vector<float> cl_output(ncls);
0234 for (size_t iC = 0; iC < ncls; iC++) {
0235 if (iC < cfg_.maxNClusters) {
0236 float y = y_cl(b, iC, 0);
0237
0238 cl_output[iC] = 1 / (1 + std::exp(-y));
0239 } else {
0240
0241 cl_output[iC] = 0;
0242 }
0243 }
0244 outputs_clustering.push_back(cl_output);
0245 }
0246 }
0247
0248 return outputs_clustering;
0249 }