Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-06-20 22:43:40

0001 #include "RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h"
0002 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0003 #include "FWCore/ParameterSet/interface/FileInPath.h"
0004 #include "TMath.h"
0005 #include <iostream>
0006 #include <fstream>
0007 using namespace reco;
0008 
0009 const std::vector<std::string> DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy",
0010                                                                                 "cl_et",
0011                                                                                 "cl_eta",
0012                                                                                 "cl_phi",
0013                                                                                 "cl_ieta",
0014                                                                                 "cl_iphi",
0015                                                                                 "cl_iz",
0016                                                                                 "cl_seed_dEta",
0017                                                                                 "cl_seed_dPhi",
0018                                                                                 "cl_seed_dEnergy",
0019                                                                                 "cl_seed_dEt",
0020                                                                                 "cl_nxtals"};
0021 const std::vector<std::string> DeepSCGraphEvaluation::availableWindowInputs = {
0022     "max_cl_energy", "max_cl_et",        "max_cl_eta",       "max_cl_phi",          "max_cl_ieta",     "max_cl_iphi",
0023     "max_cl_iz",     "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals",
0024     "min_cl_energy", "min_cl_et",        "min_cl_eta",       "min_cl_phi",          "min_cl_ieta",     "min_cl_iphi",
0025     "min_cl_iz",     "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals",
0026     "avg_cl_energy", "avg_cl_et",        "avg_cl_eta",       "avg_cl_phi",          "avg_cl_ieta",     "avg_cl_iphi",
0027     "avg_cl_iz",     "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"};
0028 const std::vector<std::string> DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"};
0029 
0030 DeepSCGraphEvaluation::DeepSCGraphEvaluation(const DeepSCConfiguration& cfg) : cfg_(cfg) {
0031   tensorflow::setLogging("0");
0032   // Init TF graph and session objects
0033   initTensorFlowGraphAndSession();
0034   // Init scaler configs
0035   inputFeaturesClusters =
0036       readInputFeaturesConfig(cfg_.configFileClusterFeatures, DeepSCGraphEvaluation::availableClusterInputs);
0037   if (inputFeaturesClusters.size() != cfg_.nClusterFeatures) {
0038     throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0039                                                << "parameters in the scaler file.";
0040   }
0041   inputFeaturesWindows =
0042       readInputFeaturesConfig(cfg_.configFileWindowFeatures, DeepSCGraphEvaluation::availableWindowInputs);
0043   if (inputFeaturesWindows.size() != cfg_.nWindowFeatures) {
0044     throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0045                                                << "parameters in the scaler file.";
0046   }
0047   inputFeaturesHits = readInputFeaturesConfig(cfg_.configFileHitsFeatures, DeepSCGraphEvaluation::availableHitsInputs);
0048   if (inputFeaturesHits.size() != cfg_.nHitsFeatures) {
0049     throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
0050                                                << "parameters in the scaler file.";
0051   }
0052 }
0053 
0054 DeepSCGraphEvaluation::~DeepSCGraphEvaluation() {
0055   if (session_ != nullptr)
0056     tensorflow::closeSession(session_);
0057 }
0058 
0059 void DeepSCGraphEvaluation::initTensorFlowGraphAndSession() {
0060   // load the graph definition
0061   LogDebug("DeepSCGraphEvaluation") << "Loading graph";
0062   graphDef_ =
0063       std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath()));
0064   LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions";
0065   session_ = tensorflow::createSession(graphDef_.get());
0066   LogDebug("DeepSCGraphEvaluation") << "TF ready";
0067 }
0068 
0069 DeepSCInputs::InputConfigs DeepSCGraphEvaluation::readInputFeaturesConfig(
0070     std::string file, const std::vector<std::string>& availableInputs) const {
0071   DeepSCInputs::InputConfigs features;
0072   LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath();
0073   std::ifstream inputfile{edm::FileInPath(file).fullPath()};
0074   if (inputfile.fail()) {
0075     throw cms::Exception("MissingFile") << "Input features config file not found: " << file;
0076   } else {
0077     // Now read mean, scale factors for each variable
0078     float par1, par2;
0079     std::string varName, type_str;
0080     DeepSCInputs::ScalerType type;
0081     while (inputfile >> varName >> type_str >> par1 >> par2) {
0082       if (type_str == "MeanRms")
0083         type = DeepSCInputs::ScalerType::MeanRms;
0084       else if (type_str == "MinMax")
0085         type = DeepSCInputs::ScalerType::MinMax;
0086       else
0087         type = DeepSCInputs::ScalerType::None;  //do nothing
0088       features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
0089       // Protection for mismatch between requested variables and the available ones
0090       auto match = std::find(availableInputs.begin(), availableInputs.end(), varName);
0091       if (match == std::end(availableInputs)) {
0092         throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs";
0093       }
0094       LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str;
0095     }
0096   }
0097   return features;
0098 }
0099 
0100 std::vector<float> DeepSCGraphEvaluation::getScaledInputs(const DeepSCInputs::FeaturesMap& variables,
0101                                                           const DeepSCInputs::InputConfigs& config) const {
0102   std::vector<float> inputs;
0103   inputs.reserve(config.size());
0104   // Loop on the list of requested variables and scaling values
0105   // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
0106   for (auto& [varName, type, par1, par2] : config) {
0107     if (type == DeepSCInputs::ScalerType::MeanRms)
0108       inputs.push_back((variables.at(varName) - par1) / par2);
0109     else if (type == DeepSCInputs::ScalerType::MinMax)
0110       inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
0111     else if (type == DeepSCInputs::ScalerType::None) {
0112       inputs.push_back(variables.at(varName));  // Do nothing on the variable
0113     }
0114     //Protection for mismatch between requested variables and the available ones
0115     // have been added when the scaler config are loaded --> here we know that the variables are available
0116   }
0117   return inputs;
0118 }
0119 
0120 std::vector<std::vector<float>> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const {
0121   LogDebug("DeepSCGraphEvaluation") << "Starting evaluation";
0122 
0123   // Final output
0124   std::vector<std::vector<float>> outputs_clustering;
0125 
0126   // We need to split the total inputs in N batches of size batchSize (configured in the producer)
0127   // being careful with the last batch which will have less than batchSize elements
0128   size_t nInputs = inputs.clustersX.size();
0129   uint iB = -1;  // batch index
0130   while (nInputs > 0) {
0131     iB++;  // go to next batch
0132     size_t nItems;
0133     if (nInputs >= cfg_.batchSize) {
0134       nItems = cfg_.batchSize;
0135       nInputs -= cfg_.batchSize;
0136     } else {
0137       nItems = nInputs;
0138       nInputs = 0;
0139     }
0140 
0141     // Inputs
0142     tensorflow::Tensor clsX_{tensorflow::DT_FLOAT,
0143                              {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}};
0144     tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.nWindowFeatures}};
0145     tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT,
0146                               {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}};
0147     tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.maxNClusters, 1}};
0148     tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems)}};
0149 
0150     // Look on batch dim
0151     for (size_t b = 0; b < nItems; b++) {
0152       const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b];
0153       // Loop on clusters
0154       for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0155         // Loop on features
0156         for (size_t z = 0; z < cfg_.nClusterFeatures; z++) {
0157           if (k < cls_data.size()) {
0158             clsX_.tensor<float, 3>()(b, k, z) = float(cls_data[k][z]);
0159           } else {
0160             clsX_.tensor<float, 3>()(b, k, z) = 0.;
0161           }
0162         }
0163       }
0164     }
0165 
0166     // Look on batch dim
0167     for (size_t b = 0; b < nItems; b++) {
0168       const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b];
0169       // Loop on features
0170       for (size_t k = 0; k < cfg_.nWindowFeatures; k++) {
0171         windX_.matrix<float>()(b, k) = float(wind_features[k]);
0172       }
0173     }
0174 
0175     // Look on batch dim
0176     for (size_t b = 0; b < nItems; b++) {
0177       const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b];
0178       size_t ncls_in_window = hits_data.size();
0179       // Loop on clusters
0180       for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0181         // Check padding
0182         size_t nhits_in_cluster;
0183         if (k < ncls_in_window)
0184           nhits_in_cluster = hits_data[k].size();
0185         else
0186           nhits_in_cluster = 0;
0187 
0188         // Loop on hits
0189         for (size_t j = 0; j < cfg_.maxNRechits; j++) {
0190           // Check the number of clusters and hits for padding
0191           bool ok = j < nhits_in_cluster;
0192           // Loop on rechits features
0193           for (size_t z = 0; z < cfg_.nHitsFeatures; z++) {
0194             if (ok)
0195               hitsX_.tensor<float, 4>()(b, k, j, z) = float(hits_data[k][j][z]);
0196             else
0197               hitsX_.tensor<float, 4>()(b, k, j, z) = 0.;
0198           }
0199         }
0200       }
0201     }
0202 
0203     // Look on batch dim
0204     for (size_t b = 0; b < nItems; b++) {
0205       const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b];
0206       // Loop on clusters
0207       for (size_t k = 0; k < cfg_.maxNClusters; k++) {
0208         if (k < isSeed_data.size()) {
0209           isSeedX_.tensor<float, 3>()(b, k, 0) = float(isSeed_data[k]);
0210         } else {
0211           isSeedX_.tensor<float, 3>()(b, k, 0) = 0.;
0212         }
0213       }
0214     }
0215 
0216     for (size_t b = 0; b < nItems; b++) {
0217       nClsSize_.vec<float>()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size());
0218     }
0219 
0220     std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict = {
0221         {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}};
0222 
0223     // prepare tensorflow outputs
0224     std::vector<tensorflow::Tensor> outputs_tf;
0225     // // Define the output and run
0226     // // Run the models
0227     LogDebug("DeepSCGraphEvaluation") << "Run model";
0228     tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf);
0229     // Reading the 1st output: clustering probability
0230     const auto& y_cl = outputs_tf[0].tensor<float, 3>();
0231     // Iterate on the clusters for each window
0232     for (size_t b = 0; b < nItems; b++) {
0233       uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size();
0234       std::vector<float> cl_output(ncls);
0235       for (size_t iC = 0; iC < ncls; iC++) {
0236         if (iC < cfg_.maxNClusters) {
0237           float y = y_cl(b, iC, 0);
0238           // Applying sigmoid to logit
0239           cl_output[iC] = 1 / (1 + std::exp(-y));
0240         } else {
0241           // The number of clusters is over the padding max dim
0242           cl_output[iC] = 0;
0243         }
0244       }
0245       outputs_clustering.push_back(cl_output);
0246     }
0247   }
0248 
0249   return outputs_clustering;
0250 }