File indexing completed on 2024-04-06 12:24:45
0001 #include "RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h"
0002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0003 #include "FWCore/ParameterSet/interface/FileInPath.h"
0004 #include "TMath.h"
0005 #include <iostream>
0006 #include <fstream>
0007 using namespace reco;
0008
0009 const std::vector<std::string> DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy",
0010 "cl_et",
0011 "cl_eta",
0012 "cl_phi",
0013 "cl_ieta",
0014 "cl_iphi",
0015 "cl_iz",
0016 "cl_seed_dEta",
0017 "cl_seed_dPhi",
0018 "cl_seed_dEnergy",
0019 "cl_seed_dEt",
0020 "cl_nxtals"};
0021 const std::vector<std::string> DeepSCGraphEvaluation::availableWindowInputs = {
0022 "max_cl_energy", "max_cl_et", "max_cl_eta", "max_cl_phi", "max_cl_ieta", "max_cl_iphi",
0023 "max_cl_iz", "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals",
0024 "min_cl_energy", "min_cl_et", "min_cl_eta", "min_cl_phi", "min_cl_ieta", "min_cl_iphi",
0025 "min_cl_iz", "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals",
0026 "avg_cl_energy", "avg_cl_et", "avg_cl_eta", "avg_cl_phi", "avg_cl_ieta", "avg_cl_iphi",
0027 "avg_cl_iz", "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"};
0028 const std::vector<std::string> DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"};
0029
0030 DeepSCGraphEvaluation::DeepSCGraphEvaluation(const DeepSCConfiguration& cfg) : cfg_(cfg) {
0031 tensorflow::setLogging("0");
0032
0033 initTensorFlowGraphAndSession();
0034
0035 inputFeaturesClusters =
0036 readInputFeaturesConfig(cfg_.configFileClusterFeatures, DeepSCGraphEvaluation::availableClusterInputs);
0037 if (inputFeaturesClusters.size() != cfg_.nClusterFeatures) {
0038 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0039 << "parameters in the scaler file.";
0040 }
0041 inputFeaturesWindows =
0042 readInputFeaturesConfig(cfg_.configFileWindowFeatures, DeepSCGraphEvaluation::availableWindowInputs);
0043 if (inputFeaturesWindows.size() != cfg_.nWindowFeatures) {
0044 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0045 << "parameters in the scaler file.";
0046 }
0047 inputFeaturesHits = readInputFeaturesConfig(cfg_.configFileHitsFeatures, DeepSCGraphEvaluation::availableHitsInputs);
0048 if (inputFeaturesHits.size() != cfg_.nHitsFeatures) {
0049 throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0050 << "parameters in the scaler file.";
0051 }
0052 }
0053
0054 DeepSCGraphEvaluation::~DeepSCGraphEvaluation() {
0055 if (session_ != nullptr)
0056 tensorflow::closeSession(session_);
0057 }
0058
0059 void DeepSCGraphEvaluation::initTensorFlowGraphAndSession() {
0060
0061 LogDebug("DeepSCGraphEvaluation") << "Loading graph";
0062 graphDef_ =
0063 std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath()));
0064 LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions";
0065 session_ = tensorflow::createSession(graphDef_.get());
0066 LogDebug("DeepSCGraphEvaluation") << "TF ready";
0067 }
0068
0069 DeepSCInputs::InputConfigs DeepSCGraphEvaluation::readInputFeaturesConfig(
0070 std::string file, const std::vector<std::string>& availableInputs) const {
0071 DeepSCInputs::InputConfigs features;
0072 LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath();
0073 std::ifstream inputfile{edm::FileInPath(file).fullPath()};
0074 if (inputfile.fail()) {
0075 throw cms::Exception("MissingFile") << "Input features config file not found: " << file;
0076 } else {
0077
0078 float par1, par2;
0079 std::string varName, type_str;
0080 DeepSCInputs::ScalerType type;
0081 while (inputfile >> varName >> type_str >> par1 >> par2) {
0082 if (type_str == "MeanRms")
0083 type = DeepSCInputs::ScalerType::MeanRms;
0084 else if (type_str == "MinMax")
0085 type = DeepSCInputs::ScalerType::MinMax;
0086 else
0087 type = DeepSCInputs::ScalerType::None;
0088 features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
0089
0090 auto match = std::find(availableInputs.begin(), availableInputs.end(), varName);
0091 if (match == std::end(availableInputs)) {
0092 throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs";
0093 }
0094 LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str;
0095 }
0096 }
0097 return features;
0098 }
0099
0100 std::vector<float> DeepSCGraphEvaluation::getScaledInputs(const DeepSCInputs::FeaturesMap& variables,
0101 const DeepSCInputs::InputConfigs& config) const {
0102 std::vector<float> inputs;
0103 inputs.reserve(config.size());
0104
0105
0106 for (auto& [varName, type, par1, par2] : config) {
0107 if (type == DeepSCInputs::ScalerType::MeanRms)
0108 inputs.push_back((variables.at(varName) - par1) / par2);
0109 else if (type == DeepSCInputs::ScalerType::MinMax)
0110 inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
0111 else if (type == DeepSCInputs::ScalerType::None) {
0112 inputs.push_back(variables.at(varName));
0113 }
0114
0115
0116 }
0117 return inputs;
0118 }
0119
0120 std::vector<std::vector<float>> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const {
0121 LogDebug("DeepSCGraphEvaluation") << "Starting evaluation";
0122
0123
0124 std::vector<std::vector<float>> outputs_clustering;
0125
0126
0127
0128 size_t nInputs = inputs.clustersX.size();
0129 uint iB = -1;
0130 while (nInputs > 0) {
0131 iB++;
0132 size_t nItems;
0133 if (nInputs >= cfg_.batchSize) {
0134 nItems = cfg_.batchSize;
0135 nInputs -= cfg_.batchSize;
0136 } else {
0137 nItems = nInputs;
0138 nInputs = 0;
0139 }
0140
0141
0142 tensorflow::Tensor clsX_{tensorflow::DT_FLOAT,
0143 {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}};
0144 tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.nWindowFeatures}};
0145 tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT,
0146 {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}};
0147 tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.maxNClusters, 1}};
0148 tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems)}};
0149
0150
0151 for (size_t b = 0; b < nItems; b++) {
0152 const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b];
0153
0154 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0155
0156 for (size_t z = 0; z < cfg_.nClusterFeatures; z++) {
0157 if (k < cls_data.size()) {
0158 clsX_.tensor<float, 3>()(b, k, z) = float(cls_data[k][z]);
0159 } else {
0160 clsX_.tensor<float, 3>()(b, k, z) = 0.;
0161 }
0162 }
0163 }
0164 }
0165
0166
0167 for (size_t b = 0; b < nItems; b++) {
0168 const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b];
0169
0170 for (size_t k = 0; k < cfg_.nWindowFeatures; k++) {
0171 windX_.matrix<float>()(b, k) = float(wind_features[k]);
0172 }
0173 }
0174
0175
0176 for (size_t b = 0; b < nItems; b++) {
0177 const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b];
0178 size_t ncls_in_window = hits_data.size();
0179
0180 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0181
0182 size_t nhits_in_cluster;
0183 if (k < ncls_in_window)
0184 nhits_in_cluster = hits_data[k].size();
0185 else
0186 nhits_in_cluster = 0;
0187
0188
0189 for (size_t j = 0; j < cfg_.maxNRechits; j++) {
0190
0191 bool ok = j < nhits_in_cluster;
0192
0193 for (size_t z = 0; z < cfg_.nHitsFeatures; z++) {
0194 if (ok)
0195 hitsX_.tensor<float, 4>()(b, k, j, z) = float(hits_data[k][j][z]);
0196 else
0197 hitsX_.tensor<float, 4>()(b, k, j, z) = 0.;
0198 }
0199 }
0200 }
0201 }
0202
0203
0204 for (size_t b = 0; b < nItems; b++) {
0205 const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b];
0206
0207 for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0208 if (k < isSeed_data.size()) {
0209 isSeedX_.tensor<float, 3>()(b, k, 0) = float(isSeed_data[k]);
0210 } else {
0211 isSeedX_.tensor<float, 3>()(b, k, 0) = 0.;
0212 }
0213 }
0214 }
0215
0216 for (size_t b = 0; b < nItems; b++) {
0217 nClsSize_.vec<float>()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size());
0218 }
0219
0220 std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict = {
0221 {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}};
0222
0223
0224 std::vector<tensorflow::Tensor> outputs_tf;
0225
0226
0227 LogDebug("DeepSCGraphEvaluation") << "Run model";
0228 tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf);
0229
0230 const auto& y_cl = outputs_tf[0].tensor<float, 3>();
0231
0232 for (size_t b = 0; b < nItems; b++) {
0233 uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size();
0234 std::vector<float> cl_output(ncls);
0235 for (size_t iC = 0; iC < ncls; iC++) {
0236 if (iC < cfg_.maxNClusters) {
0237 float y = y_cl(b, iC, 0);
0238
0239 cl_output[iC] = 1 / (1 + std::exp(-y));
0240 } else {
0241
0242 cl_output[iC] = 0;
0243 }
0244 }
0245 outputs_clustering.push_back(cl_output);
0246 }
0247 }
0248
0249 return outputs_clustering;
0250 }