Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:45

0001 #include "RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h"
0002 #include <cmath>
0003 
0004 using namespace std;
0005 using namespace reco;
0006 using namespace reco::DeepSCInputs;
0007 
0008 typedef std::vector<CalibratedPFCluster> CalibratedPFClusterVector;
0009 
0010 EcalClustersGraph::EcalClustersGraph(CalibratedPFClusterVector clusters,
0011                                      int nSeeds,
0012                                      const CaloTopology* topology,
0013                                      const CaloSubdetectorGeometry* ebGeom,
0014                                      const CaloSubdetectorGeometry* eeGeom,
0015                                      const EcalRecHitCollection* recHitsEB,
0016                                      const EcalRecHitCollection* recHitsEE,
0017                                      const SCProducerCache* cache)
0018     : clusters_(clusters),
0019       nSeeds_(nSeeds),
0020       nCls_(clusters_.size()),
0021       topology_(topology),
0022       ebGeom_(ebGeom),
0023       eeGeom_(eeGeom),
0024       recHitsEB_(recHitsEB),
0025       recHitsEE_(recHitsEE),
0026       scProducerCache_(cache),
0027       graphMap_(clusters.size()) {
0028   // Prepare the batch size of the tensor inputs == number of windows
0029   inputs_.clustersX.resize(nSeeds_);
0030   inputs_.windowX.resize(nSeeds_);
0031   inputs_.hitsX.resize(nSeeds_);
0032   inputs_.isSeed.resize(nSeeds_);
0033 
0034   // Init the graph nodes
0035   for (size_t i = 0; i < nCls_; i++) {
0036     if (i < nSeeds_)
0037       graphMap_.addNode(i, GraphMap::NodeCategory::kSeed);
0038     else
0039       graphMap_.addNode(i, GraphMap::NodeCategory::kNode);
0040   }
0041 
0042   // Select the collection strategy from the config
0043   if (scProducerCache_->config.collectionStrategy == "Cascade") {
0044     strategy_ = GraphMap::CollectionStrategy::Cascade;
0045   } else if (scProducerCache_->config.collectionStrategy == "CollectAndMerge") {
0046     strategy_ = GraphMap::CollectionStrategy::CollectAndMerge;
0047   } else if (scProducerCache_->config.collectionStrategy == "SeedsFirst") {
0048     strategy_ = GraphMap::CollectionStrategy::SeedsFirst;
0049   } else if (scProducerCache_->config.collectionStrategy == "CascadeHighest") {
0050     strategy_ = GraphMap::CollectionStrategy::CascadeHighest;
0051   } else {
0052     edm::LogWarning("EcalClustersGraph") << "GraphMap::CollectionStrategy not recognized. Default to Cascade";
0053     strategy_ = GraphMap::CollectionStrategy::Cascade;
0054   }
0055 
0056   LogTrace("EcalClustersGraph") << "EcalClustersGraph created. nSeeds " << nSeeds_ << ", nClusters " << nCls_ << endl;
0057 }
0058 
0059 std::array<int, 3> EcalClustersGraph::clusterPosition(const CaloCluster* cluster) const {
0060   std::array<int, 3> coordinates;
0061   int ieta = -999;
0062   int iphi = -999;
0063   int iz = -99;
0064 
0065   const math::XYZPoint& caloPos = cluster->position();
0066   if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0067     EBDetId eb_id(ebGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z())));
0068     ieta = eb_id.ieta();
0069     iphi = eb_id.iphi();
0070     iz = 0;
0071   } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0072     EEDetId ee_id(eeGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z())));
0073     ieta = ee_id.ix();
0074     iphi = ee_id.iy();
0075     if (ee_id.zside() < 0)
0076       iz = -1;
0077     if (ee_id.zside() > 0)
0078       iz = 1;
0079   }
0080 
0081   coordinates[0] = ieta;
0082   coordinates[1] = iphi;
0083   coordinates[2] = iz;
0084   return coordinates;
0085 }
0086 
0087 std::array<double, 3> EcalClustersGraph::dynamicWindow(double seedEta) const {
0088   // The dEta-dPhi detector window dimension is chosen to that the algorithm is always larger than
0089   // the Mustache dimension
0090   std::array<double, 3> window;
0091 
0092   double eta = std::abs(seedEta);
0093   double deta_down = 0.;
0094   double deta_up = 0.;
0095   double dphi = 0.;
0096 
0097   //deta_down
0098   constexpr float deta_down_bins[2] = {2.1, 2.5};
0099   if (eta < deta_down_bins[0])
0100     deta_down = -0.075;
0101   else if (eta >= deta_down_bins[0] && eta < deta_down_bins[1])
0102     deta_down = -0.1875 * eta + 0.31875;
0103   else if (eta >= deta_down_bins[1])
0104     deta_down = -0.15;
0105 
0106   //deta_up
0107   constexpr float deta_up_bins[4] = {0.1, 1.3, 1.7, 1.9};
0108   if (eta < deta_up_bins[0])
0109     deta_up = 0.075;
0110   else if (eta >= deta_up_bins[0] && eta < deta_up_bins[1])
0111     deta_up = 0.0758929 - 0.0178571 * eta + 0.0892857 * (eta * eta);
0112   else if (eta >= deta_up_bins[1] && eta < deta_up_bins[2])
0113     deta_up = 0.2;
0114   else if (eta >= deta_up_bins[2] && eta < deta_up_bins[3])
0115     deta_up = 0.625 - 0.25 * eta;
0116   else if (eta >= deta_up_bins[3])
0117     deta_up = 0.15;
0118 
0119   //dphi
0120   constexpr float dphi_bins[2] = {1.9, 2.7};
0121   if (eta < dphi_bins[0])
0122     dphi = 0.6;
0123   else if (eta >= dphi_bins[0] && eta < dphi_bins[1])
0124     dphi = 1.075 - 0.25 * eta;
0125   else if (eta >= dphi_bins[1])
0126     dphi = 0.4;
0127 
0128   window[0] = deta_down;
0129   window[1] = deta_up;
0130   window[2] = dphi;
0131 
0132   return window;
0133 }
0134 
0135 void EcalClustersGraph::initWindows() {
0136   for (uint is = 0; is < nSeeds_; is++) {
0137     const auto& seedLocal = clusterPosition((clusters_[is]).ptr().get());
0138     double seed_eta = clusters_[is].eta();
0139     double seed_phi = clusters_[is].phi();
0140     const auto& width = dynamicWindow(seed_eta);
0141     // Add a self loop on the seed node
0142     graphMap_.addEdge(is, is);
0143 
0144     // The graph associated to each seed includes only other seeds if they have a smaller energy.
0145     // This is imposed to be consistent with the current trained model, which has been training on "non-overalapping windows".
0146     // The effect of connecting all the seeds, and not only the smaller energy ones has been already tested: the reconstruction
0147     // differences are negligible (tested with Cascade collection algo).
0148     // In the next version of the training this requirement will be relaxed to have a model that fully matches the reconstruction
0149     // mechanism in terms of overlapping seeds.
0150     for (uint icl = is + 1; icl < nCls_; icl++) {
0151       if (is == icl)
0152         continue;
0153       const auto& clusterLocal = clusterPosition((clusters_[icl]).ptr().get());
0154       double cl_eta = clusters_[icl].eta();
0155       double cl_phi = clusters_[icl].phi();
0156       double dphi = deltaPhi(seed_phi, cl_phi);
0157       double deta = deltaEta(seed_eta, cl_eta);
0158 
0159       if (seedLocal[2] == clusterLocal[2] && deta >= width[0] && deta <= width[1] && std::abs(dphi) <= width[2]) {
0160         graphMap_.addEdge(is, icl);
0161       }
0162     }
0163   }
0164 }
0165 
0166 std::vector<std::vector<float>> EcalClustersGraph::fillHits(const CaloCluster* cluster) const {
0167   const std::vector<std::pair<DetId, float>>& hitsAndFractions = cluster->hitsAndFractions();
0168   std::vector<std::vector<float>> out(hitsAndFractions.size());
0169   if (hitsAndFractions.empty()) {
0170     edm::LogError("EcalClustersGraph") << "No hits in cluster!!";
0171   }
0172   // Map containing the available features for the rechits
0173   DeepSCInputs::FeaturesMap rechitsFeatures;
0174   for (unsigned int i = 0; i < hitsAndFractions.size(); i++) {
0175     rechitsFeatures.clear();
0176     if (hitsAndFractions[i].first.subdetId() == EcalBarrel) {
0177       double energy = (*recHitsEB_->find(hitsAndFractions[i].first)).energy();
0178       EBDetId eb_id(hitsAndFractions[i].first);
0179       rechitsFeatures["ieta"] = eb_id.ieta();                                //ieta
0180       rechitsFeatures["iphi"] = eb_id.iphi();                                //iphi
0181       rechitsFeatures["iz"] = 0.;                                            //iz
0182       rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second;  //energy * fraction
0183     } else if (hitsAndFractions[i].first.subdetId() == EcalEndcap) {
0184       double energy = (*recHitsEE_->find(hitsAndFractions[i].first)).energy();
0185       EEDetId ee_id(hitsAndFractions[i].first);
0186       rechitsFeatures["ieta"] = ee_id.ix();  //ix
0187       rechitsFeatures["iphi"] = ee_id.iy();  //iy
0188       if (ee_id.zside() < 0)
0189         rechitsFeatures["iz"] = -1.;  //iz
0190       if (ee_id.zside() > 0)
0191         rechitsFeatures["iz"] = +1.;                                         //iz
0192       rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second;  //energy * fraction
0193     } else {
0194       edm::LogError("EcalClustersGraph") << "Rechit is not either EB or EE!!";
0195     }
0196     // Use the method in DeepSCGraphEvaluation to get only the requested variables and possible a rescaling
0197     // (depends on configuration)
0198     out[i] = scProducerCache_->deepSCEvaluator->getScaledInputs(rechitsFeatures,
0199                                                                 scProducerCache_->deepSCEvaluator->inputFeaturesHits);
0200   }
0201   return out;
0202 }
0203 
0204 DeepSCInputs::FeaturesMap EcalClustersGraph::computeVariables(const CaloCluster* seed,
0205                                                               const CaloCluster* cluster) const {
0206   DeepSCInputs::FeaturesMap clFeatures;
0207   const auto& clusterLocal = clusterPosition(cluster);
0208   double cl_energy = cluster->energy();
0209   double cl_eta = cluster->eta();
0210   double cl_phi = cluster->phi();
0211   double seed_energy = seed->energy();
0212   double seed_eta = seed->eta();
0213   double seed_phi = seed->phi();
0214   clFeatures["cl_energy"] = cl_energy;                                                                //cl_energy
0215   clFeatures["cl_et"] = cl_energy / std::cosh(cl_eta);                                                //cl_et
0216   clFeatures["cl_eta"] = cl_eta;                                                                      //cl_eta
0217   clFeatures["cl_phi"] = cl_phi;                                                                      //cl_phi
0218   clFeatures["cl_ieta"] = clusterLocal[0];                                                            //cl_ieta/ix
0219   clFeatures["cl_iphi"] = clusterLocal[1];                                                            //cl_iphi/iy
0220   clFeatures["cl_iz"] = clusterLocal[2];                                                              //cl_iz
0221   clFeatures["cl_seed_dEta"] = deltaEta(seed_eta, cl_eta);                                            //cl_dEta
0222   clFeatures["cl_seed_dPhi"] = deltaPhi(seed_phi, cl_phi);                                            //cl_dPhi
0223   clFeatures["cl_seed_dEnergy"] = seed_energy - cl_energy;                                            //cl_dEnergy
0224   clFeatures["cl_seed_dEt"] = (seed_energy / std::cosh(seed_eta)) - (cl_energy / std::cosh(cl_eta));  //cl_dEt
0225   clFeatures["cl_nxtals"] = cluster->hitsAndFractions().size();                                       // nxtals
0226   return clFeatures;
0227 }
0228 
0229 DeepSCInputs::FeaturesMap EcalClustersGraph::computeWindowVariables(
0230     const std::vector<DeepSCInputs::FeaturesMap>& clusters) const {
0231   size_t nCls = clusters.size();
0232   std::map<std::string, float> min;
0233   std::map<std::string, float> max;
0234   std::map<std::string, float> avg;
0235   for (const auto& clFeatures : clusters) {
0236     for (auto const& [key, val] : clFeatures) {
0237       avg[key] += (val / nCls);
0238       if (val < min[key])
0239         min[key] = val;
0240       if (val > max[key])
0241         max[key] = val;
0242     }
0243   }
0244   DeepSCInputs::FeaturesMap windFeatures;
0245   for (auto const& el : clusters.front()) {
0246     windFeatures["max_" + el.first] = max[el.first];
0247     windFeatures["min_" + el.first] = min[el.first];
0248     windFeatures["avg_" + el.first] = avg[el.first];
0249   }
0250   return windFeatures;
0251 }
0252 
0253 std::pair<double, double> EcalClustersGraph::computeCovariances(const CaloCluster* cluster) {
0254   double numeratorEtaWidth = 0;
0255   double numeratorPhiWidth = 0;
0256   double denominator = cluster->energy();
0257   double clEta = cluster->position().eta();
0258   double clPhi = cluster->position().phi();
0259   std::shared_ptr<const CaloCellGeometry> this_cell;
0260   EcalRecHitCollection::const_iterator rHit;
0261 
0262   const std::vector<std::pair<DetId, float>>& detId = cluster->hitsAndFractions();
0263   // Loop over recHits associated with the given SuperCluster
0264   for (const auto& hit : detId) {
0265     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0266       rHit = recHitsEB_->find(hit.first);
0267       if (rHit == recHitsEB_->end()) {
0268         continue;
0269       }
0270       this_cell = ebGeom_->getGeometry(rHit->id());
0271     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0272       rHit = recHitsEE_->find(hit.first);
0273       if (rHit == recHitsEE_->end()) {
0274         continue;
0275       }
0276       this_cell = eeGeom_->getGeometry(rHit->id());
0277     } else {
0278       continue;
0279     }
0280 
0281     GlobalPoint position = this_cell->getPosition();
0282     //take into account energy fractions
0283     double energyHit = rHit->energy() * hit.second;
0284     //form differences
0285     double dPhi = deltaPhi(position.phi(), clPhi);
0286     double dEta = position.eta() - clEta;
0287     if (energyHit > 0) {
0288       numeratorEtaWidth += energyHit * dEta * dEta;
0289       numeratorPhiWidth += energyHit * dPhi * dPhi;
0290     }
0291   }
0292   double etaWidth = sqrt(numeratorEtaWidth / denominator);
0293   double phiWidth = sqrt(numeratorPhiWidth / denominator);
0294 
0295   return std::make_pair(etaWidth, phiWidth);
0296 }
0297 
0298 std::vector<double> EcalClustersGraph::computeShowerShapes(const CaloCluster* cluster, bool full5x5 = false) {
0299   std::vector<double> showerVars_;
0300   showerVars_.resize(8);
0301   widths_ = computeCovariances(cluster);
0302   float e1 = 1.;
0303   float e4 = 0.;
0304   float r9 = 0.;
0305 
0306   if (full5x5) {
0307     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0308       locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_);
0309       e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEB_);
0310       e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) +
0311            noZS::EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) +
0312            noZS::EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) +
0313            noZS::EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_);
0314       r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy();  //r9
0315 
0316     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0317       locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_);
0318       e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEE_);
0319       e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) +
0320            noZS::EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) +
0321            noZS::EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) +
0322            noZS::EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_);
0323       r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy();  //r9
0324     }
0325   } else {
0326     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0327       locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_);
0328       e1 = EcalClusterTools::eMax(*cluster, recHitsEB_);
0329       e4 = EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) +
0330            EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) +
0331            EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) +
0332            EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_);
0333       r9 = EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy();  //r9
0334     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0335       locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_);
0336       e1 = EcalClusterTools::eMax(*cluster, recHitsEE_);
0337       e4 = EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) +
0338            EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) +
0339            EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) +
0340            EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_);
0341       r9 = EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy();
0342     }
0343   }
0344   showerVars_[0] = r9;
0345   showerVars_[1] = sqrt(locCov_[0]);                                      //sigmaietaieta
0346   showerVars_[2] = locCov_[1];                                            //sigmaietaiphi
0347   showerVars_[3] = (!edm::isFinite(locCov_[2])) ? 0. : sqrt(locCov_[2]);  //sigmaiphiiphi
0348   showerVars_[4] = (e1 != 0.) ? 1. - e4 / e1 : -999.;                     //swiss_cross
0349   showerVars_[5] = cluster->hitsAndFractions().size();                    //nXtals
0350   showerVars_[6] = widths_.first;                                         //etaWidth
0351   showerVars_[7] = widths_.second;                                        //phiWidth
0352 
0353   return showerVars_;
0354 }
0355 
0356 void EcalClustersGraph::fillVariables() {
0357   LogDebug("EcalClustersGraph") << "Fill tensorflow input vector";
0358   const auto& deepSCEval = scProducerCache_->deepSCEvaluator;
0359   // Reserving the batch dimension
0360   inputs_.clustersX.reserve(nSeeds_);
0361   inputs_.hitsX.reserve(nSeeds_);
0362   inputs_.windowX.reserve(nSeeds_);
0363   inputs_.isSeed.reserve(nSeeds_);
0364 
0365   // Looping on all the seeds (window)
0366   for (uint is = 0; is < nSeeds_; is++) {
0367     const auto seedPointer = (clusters_[is]).ptr().get();
0368     std::vector<DeepSCInputs::FeaturesMap> unscaledClusterFeatures;
0369     const auto& outEdges = graphMap_.getOutEdges(is);
0370     size_t ncls = outEdges.size();
0371     // Reserve the vector size
0372     inputs_.clustersX[is].reserve(ncls);
0373     inputs_.hitsX[is].reserve(ncls);
0374     inputs_.isSeed[is].reserve(ncls);
0375     unscaledClusterFeatures.reserve(ncls);
0376     // Loop on all the clusters
0377     for (const auto ic : outEdges) {
0378       LogTrace("EcalClustersGraph") << "seed: " << is << ", out edge --> " << ic;
0379       const auto clPointer = (clusters_[ic]).ptr().get();
0380       const auto& clusterFeatures = computeVariables(seedPointer, clPointer);
0381       for (const auto& [key, val] : clusterFeatures) {
0382         LogTrace("EcalCluster") << key << "=" << val;
0383       }
0384       unscaledClusterFeatures.push_back(clusterFeatures);
0385       // Select and scale only the requested variables for the tensorflow model input
0386       inputs_.clustersX[is].push_back(deepSCEval->getScaledInputs(clusterFeatures, deepSCEval->inputFeaturesClusters));
0387       // The scaling and feature selection on hits is performed inside the function for each hit
0388       inputs_.hitsX[is].push_back(fillHits(clPointer));
0389       inputs_.isSeed[is].push_back(ic == is);
0390     }
0391     // For window we need the unscaled cluster features and then we select them
0392     inputs_.windowX[is] =
0393         deepSCEval->getScaledInputs(computeWindowVariables(unscaledClusterFeatures), deepSCEval->inputFeaturesWindows);
0394   }
0395   LogTrace("EcalClustersGraph") << "N. Windows: " << inputs_.clustersX.size();
0396 }
0397 
0398 void EcalClustersGraph::evaluateScores() {
0399   // Evaluate model
0400   const auto& scores = scProducerCache_->deepSCEvaluator->evaluate(inputs_);
0401   for (uint i = 0; i < nSeeds_; ++i) {
0402     uint k = 0;
0403     LogTrace("EcalClustersGraph") << "Score) seed: " << i << ":";
0404     for (auto const& j : graphMap_.getOutEdges(i)) {
0405       // Fill the scores from seed --> node (i --> j)
0406       // Not symmetrically, in order to save multiple values for seeds in other
0407       // seeds windows.
0408       graphMap_.setAdjMatrix(i, j, scores[i][k]);
0409       LogTrace("EcalClustersGraph") << "\t" << i << "-->" << j << ": " << scores[i][k];
0410       k++;
0411     }
0412   }
0413 }
0414 
0415 void EcalClustersGraph::setThresholds() {
0416   // Simple global threshold for the moment
0417   threshold_ = 0.5;
0418 }
0419 
0420 void EcalClustersGraph::selectClusters() {
0421   // Collect the final superClusters as subgraphs
0422   graphMap_.collectNodes(strategy_, threshold_);
0423 }
0424 
0425 EcalClustersGraph::EcalGraphOutput EcalClustersGraph::getGraphOutput() {
0426   EcalClustersGraph::EcalGraphOutput finalWindows_;
0427   const auto& finalSuperClusters_ = graphMap_.getGraphOutput();
0428   for (const auto& [is, cls] : finalSuperClusters_) {
0429     CalibratedPFCluster seed = clusters_[is];
0430     CalibratedPFClusterVector clusters_inWindow;
0431     for (const auto& ic : cls) {
0432       clusters_inWindow.push_back(clusters_[ic]);
0433     }
0434     finalWindows_.push_back({seed, clusters_inWindow});
0435   }
0436   return finalWindows_;
0437 }