Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-06-20 22:43:40

0001 #include "RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h"
0002 #include <cmath>
0003 
0004 using namespace std;
0005 using namespace reco;
0006 using namespace reco::DeepSCInputs;
0007 
0008 typedef std::shared_ptr<CalibratedPFCluster> CalibratedClusterPtr;
0009 typedef std::vector<CalibratedClusterPtr> CalibratedClusterPtrVector;
0010 
0011 EcalClustersGraph::EcalClustersGraph(CalibratedClusterPtrVector clusters,
0012                                      int nSeeds,
0013                                      const CaloTopology* topology,
0014                                      const CaloSubdetectorGeometry* ebGeom,
0015                                      const CaloSubdetectorGeometry* eeGeom,
0016                                      const EcalRecHitCollection* recHitsEB,
0017                                      const EcalRecHitCollection* recHitsEE,
0018                                      const SCProducerCache* cache)
0019     : clusters_(clusters),
0020       nSeeds_(nSeeds),
0021       nCls_(clusters_.size()),
0022       topology_(topology),
0023       ebGeom_(ebGeom),
0024       eeGeom_(eeGeom),
0025       recHitsEB_(recHitsEB),
0026       recHitsEE_(recHitsEE),
0027       scProducerCache_(cache),
0028       graphMap_(clusters.size()) {
0029   // Prepare the batch size of the tensor inputs == number of windows
0030   inputs_.clustersX.resize(nSeeds_);
0031   inputs_.windowX.resize(nSeeds_);
0032   inputs_.hitsX.resize(nSeeds_);
0033   inputs_.isSeed.resize(nSeeds_);
0034 
0035   // Init the graph nodes
0036   for (size_t i = 0; i < nCls_; i++) {
0037     if (i < nSeeds_)
0038       graphMap_.addNode(i, GraphMap::NodeCategory::kSeed);
0039     else
0040       graphMap_.addNode(i, GraphMap::NodeCategory::kNode);
0041   }
0042 
0043   // Select the collection strategy from the config
0044   if (scProducerCache_->config.collectionStrategy == "Cascade") {
0045     strategy_ = GraphMap::CollectionStrategy::Cascade;
0046   } else if (scProducerCache_->config.collectionStrategy == "CollectAndMerge") {
0047     strategy_ = GraphMap::CollectionStrategy::CollectAndMerge;
0048   } else if (scProducerCache_->config.collectionStrategy == "SeedsFirst") {
0049     strategy_ = GraphMap::CollectionStrategy::SeedsFirst;
0050   } else if (scProducerCache_->config.collectionStrategy == "CascadeHighest") {
0051     strategy_ = GraphMap::CollectionStrategy::CascadeHighest;
0052   } else {
0053     edm::LogWarning("EcalClustersGraph") << "GraphMap::CollectionStrategy not recognized. Default to Cascade";
0054     strategy_ = GraphMap::CollectionStrategy::Cascade;
0055   }
0056 
0057   LogTrace("EcalClustersGraph") << "EcalClustersGraph created. nSeeds " << nSeeds_ << ", nClusters " << nCls_ << endl;
0058 }
0059 
0060 std::array<int, 3> EcalClustersGraph::clusterPosition(const CaloCluster* cluster) const {
0061   std::array<int, 3> coordinates;
0062   int ieta = -999;
0063   int iphi = -999;
0064   int iz = -99;
0065 
0066   const math::XYZPoint& caloPos = cluster->position();
0067   if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0068     EBDetId eb_id(ebGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z())));
0069     ieta = eb_id.ieta();
0070     iphi = eb_id.iphi();
0071     iz = 0;
0072   } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0073     EEDetId ee_id(eeGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z())));
0074     ieta = ee_id.ix();
0075     iphi = ee_id.iy();
0076     if (ee_id.zside() < 0)
0077       iz = -1;
0078     if (ee_id.zside() > 0)
0079       iz = 1;
0080   }
0081 
0082   coordinates[0] = ieta;
0083   coordinates[1] = iphi;
0084   coordinates[2] = iz;
0085   return coordinates;
0086 }
0087 
0088 std::array<double, 3> EcalClustersGraph::dynamicWindow(double seedEta) const {
0089   // The dEta-dPhi detector window dimension is chosen to that the algorithm is always larger than
0090   // the Mustache dimension
0091   std::array<double, 3> window;
0092 
0093   double eta = std::abs(seedEta);
0094   double deta_down = 0.;
0095   double deta_up = 0.;
0096   double dphi = 0.;
0097 
0098   //deta_down
0099   constexpr float deta_down_bins[2] = {2.1, 2.5};
0100   if (eta < deta_down_bins[0])
0101     deta_down = -0.075;
0102   else if (eta >= deta_down_bins[0] && eta < deta_down_bins[1])
0103     deta_down = -0.1875 * eta + 0.31875;
0104   else if (eta >= deta_down_bins[1])
0105     deta_down = -0.15;
0106 
0107   //deta_up
0108   constexpr float deta_up_bins[4] = {0.1, 1.3, 1.7, 1.9};
0109   if (eta < deta_up_bins[0])
0110     deta_up = 0.075;
0111   else if (eta >= deta_up_bins[0] && eta < deta_up_bins[1])
0112     deta_up = 0.0758929 - 0.0178571 * eta + 0.0892857 * (eta * eta);
0113   else if (eta >= deta_up_bins[1] && eta < deta_up_bins[2])
0114     deta_up = 0.2;
0115   else if (eta >= deta_up_bins[2] && eta < deta_up_bins[3])
0116     deta_up = 0.625 - 0.25 * eta;
0117   else if (eta >= deta_up_bins[3])
0118     deta_up = 0.15;
0119 
0120   //dphi
0121   constexpr float dphi_bins[2] = {1.9, 2.7};
0122   if (eta < dphi_bins[0])
0123     dphi = 0.6;
0124   else if (eta >= dphi_bins[0] && eta < dphi_bins[1])
0125     dphi = 1.075 - 0.25 * eta;
0126   else if (eta >= dphi_bins[1])
0127     dphi = 0.4;
0128 
0129   window[0] = deta_down;
0130   window[1] = deta_up;
0131   window[2] = dphi;
0132 
0133   return window;
0134 }
0135 
0136 void EcalClustersGraph::initWindows() {
0137   for (uint is = 0; is < nSeeds_; is++) {
0138     const auto& seedLocal = clusterPosition((*clusters_[is]).ptr().get());
0139     double seed_eta = clusters_[is]->eta();
0140     double seed_phi = clusters_[is]->phi();
0141     const auto& width = dynamicWindow(seed_eta);
0142     // Add a self loop on the seed node
0143     graphMap_.addEdge(is, is);
0144 
0145     // The graph associated to each seed includes only other seeds if they have a smaller energy.
0146     // This is imposed to be consistent with the current trained model, which has been training on "non-overalapping windows".
0147     // The effect of connecting all the seeds, and not only the smaller energy ones has been already tested: the reconstruction
0148     // differences are negligible (tested with Cascade collection algo).
0149     // In the next version of the training this requirement will be relaxed to have a model that fully matches the reconstruction
0150     // mechanism in terms of overlapping seeds.
0151     for (uint icl = is + 1; icl < nCls_; icl++) {
0152       if (is == icl)
0153         continue;
0154       const auto& clusterLocal = clusterPosition((*clusters_[icl]).ptr().get());
0155       double cl_eta = clusters_[icl]->eta();
0156       double cl_phi = clusters_[icl]->phi();
0157       double dphi = deltaPhi(seed_phi, cl_phi);
0158       double deta = deltaEta(seed_eta, cl_eta);
0159 
0160       if (seedLocal[2] == clusterLocal[2] && deta >= width[0] && deta <= width[1] && std::abs(dphi) <= width[2]) {
0161         graphMap_.addEdge(is, icl);
0162       }
0163     }
0164   }
0165 }
0166 
0167 std::vector<std::vector<float>> EcalClustersGraph::fillHits(const CaloCluster* cluster) const {
0168   const std::vector<std::pair<DetId, float>>& hitsAndFractions = cluster->hitsAndFractions();
0169   std::vector<std::vector<float>> out(hitsAndFractions.size());
0170   if (hitsAndFractions.empty()) {
0171     edm::LogError("EcalClustersGraph") << "No hits in cluster!!";
0172   }
0173   // Map containing the available features for the rechits
0174   DeepSCInputs::FeaturesMap rechitsFeatures;
0175   for (unsigned int i = 0; i < hitsAndFractions.size(); i++) {
0176     rechitsFeatures.clear();
0177     if (hitsAndFractions[i].first.subdetId() == EcalBarrel) {
0178       double energy = (*recHitsEB_->find(hitsAndFractions[i].first)).energy();
0179       EBDetId eb_id(hitsAndFractions[i].first);
0180       rechitsFeatures["ieta"] = eb_id.ieta();                                //ieta
0181       rechitsFeatures["iphi"] = eb_id.iphi();                                //iphi
0182       rechitsFeatures["iz"] = 0.;                                            //iz
0183       rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second;  //energy * fraction
0184     } else if (hitsAndFractions[i].first.subdetId() == EcalEndcap) {
0185       double energy = (*recHitsEE_->find(hitsAndFractions[i].first)).energy();
0186       EEDetId ee_id(hitsAndFractions[i].first);
0187       rechitsFeatures["ieta"] = ee_id.ix();  //ix
0188       rechitsFeatures["iphi"] = ee_id.iy();  //iy
0189       if (ee_id.zside() < 0)
0190         rechitsFeatures["iz"] = -1.;  //iz
0191       if (ee_id.zside() > 0)
0192         rechitsFeatures["iz"] = +1.;                                         //iz
0193       rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second;  //energy * fraction
0194     } else {
0195       edm::LogError("EcalClustersGraph") << "Rechit is not either EB or EE!!";
0196     }
0197     // Use the method in DeepSCGraphEvaluation to get only the requested variables and possible a rescaling
0198     // (depends on configuration)
0199     out[i] = scProducerCache_->deepSCEvaluator->getScaledInputs(rechitsFeatures,
0200                                                                 scProducerCache_->deepSCEvaluator->inputFeaturesHits);
0201   }
0202   return out;
0203 }
0204 
0205 DeepSCInputs::FeaturesMap EcalClustersGraph::computeVariables(const CaloCluster* seed,
0206                                                               const CaloCluster* cluster) const {
0207   DeepSCInputs::FeaturesMap clFeatures;
0208   const auto& clusterLocal = clusterPosition(cluster);
0209   double cl_energy = cluster->energy();
0210   double cl_eta = cluster->eta();
0211   double cl_phi = cluster->phi();
0212   double seed_energy = seed->energy();
0213   double seed_eta = seed->eta();
0214   double seed_phi = seed->phi();
0215   clFeatures["cl_energy"] = cl_energy;                                                                //cl_energy
0216   clFeatures["cl_et"] = cl_energy / std::cosh(cl_eta);                                                //cl_et
0217   clFeatures["cl_eta"] = cl_eta;                                                                      //cl_eta
0218   clFeatures["cl_phi"] = cl_phi;                                                                      //cl_phi
0219   clFeatures["cl_ieta"] = clusterLocal[0];                                                            //cl_ieta/ix
0220   clFeatures["cl_iphi"] = clusterLocal[1];                                                            //cl_iphi/iy
0221   clFeatures["cl_iz"] = clusterLocal[2];                                                              //cl_iz
0222   clFeatures["cl_seed_dEta"] = deltaEta(seed_eta, cl_eta);                                            //cl_dEta
0223   clFeatures["cl_seed_dPhi"] = deltaPhi(seed_phi, cl_phi);                                            //cl_dPhi
0224   clFeatures["cl_seed_dEnergy"] = seed_energy - cl_energy;                                            //cl_dEnergy
0225   clFeatures["cl_seed_dEt"] = (seed_energy / std::cosh(seed_eta)) - (cl_energy / std::cosh(cl_eta));  //cl_dEt
0226   clFeatures["cl_nxtals"] = cluster->hitsAndFractions().size();                                       // nxtals
0227   return clFeatures;
0228 }
0229 
0230 DeepSCInputs::FeaturesMap EcalClustersGraph::computeWindowVariables(
0231     const std::vector<DeepSCInputs::FeaturesMap>& clusters) const {
0232   size_t nCls = clusters.size();
0233   std::map<std::string, float> min;
0234   std::map<std::string, float> max;
0235   std::map<std::string, float> avg;
0236   for (const auto& clFeatures : clusters) {
0237     for (auto const& [key, val] : clFeatures) {
0238       avg[key] += (val / nCls);
0239       if (val < min[key])
0240         min[key] = val;
0241       if (val > max[key])
0242         max[key] = val;
0243     }
0244   }
0245   DeepSCInputs::FeaturesMap windFeatures;
0246   for (auto const& el : clusters.front()) {
0247     windFeatures["max_" + el.first] = max[el.first];
0248     windFeatures["min_" + el.first] = min[el.first];
0249     windFeatures["avg_" + el.first] = avg[el.first];
0250   }
0251   return windFeatures;
0252 }
0253 
0254 std::pair<double, double> EcalClustersGraph::computeCovariances(const CaloCluster* cluster) {
0255   double numeratorEtaWidth = 0;
0256   double numeratorPhiWidth = 0;
0257   double denominator = cluster->energy();
0258   double clEta = cluster->position().eta();
0259   double clPhi = cluster->position().phi();
0260   std::shared_ptr<const CaloCellGeometry> this_cell;
0261   EcalRecHitCollection::const_iterator rHit;
0262 
0263   const std::vector<std::pair<DetId, float>>& detId = cluster->hitsAndFractions();
0264   // Loop over recHits associated with the given SuperCluster
0265   for (const auto& hit : detId) {
0266     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0267       rHit = recHitsEB_->find(hit.first);
0268       if (rHit == recHitsEB_->end()) {
0269         continue;
0270       }
0271       this_cell = ebGeom_->getGeometry(rHit->id());
0272     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0273       rHit = recHitsEE_->find(hit.first);
0274       if (rHit == recHitsEE_->end()) {
0275         continue;
0276       }
0277       this_cell = eeGeom_->getGeometry(rHit->id());
0278     } else {
0279       continue;
0280     }
0281 
0282     GlobalPoint position = this_cell->getPosition();
0283     //take into account energy fractions
0284     double energyHit = rHit->energy() * hit.second;
0285     //form differences
0286     double dPhi = deltaPhi(position.phi(), clPhi);
0287     double dEta = position.eta() - clEta;
0288     if (energyHit > 0) {
0289       numeratorEtaWidth += energyHit * dEta * dEta;
0290       numeratorPhiWidth += energyHit * dPhi * dPhi;
0291     }
0292   }
0293   double etaWidth = sqrt(numeratorEtaWidth / denominator);
0294   double phiWidth = sqrt(numeratorPhiWidth / denominator);
0295 
0296   return std::make_pair(etaWidth, phiWidth);
0297 }
0298 
0299 std::vector<double> EcalClustersGraph::computeShowerShapes(const CaloCluster* cluster, bool full5x5 = false) {
0300   std::vector<double> showerVars_;
0301   showerVars_.resize(8);
0302   widths_ = computeCovariances(cluster);
0303   float e1 = 1.;
0304   float e4 = 0.;
0305   float r9 = 0.;
0306 
0307   if (full5x5) {
0308     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0309       locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_);
0310       e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEB_);
0311       e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) +
0312            noZS::EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) +
0313            noZS::EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) +
0314            noZS::EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_);
0315       r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy();  //r9
0316 
0317     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0318       locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_);
0319       e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEE_);
0320       e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) +
0321            noZS::EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) +
0322            noZS::EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) +
0323            noZS::EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_);
0324       r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy();  //r9
0325     }
0326   } else {
0327     if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) {
0328       locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_);
0329       e1 = EcalClusterTools::eMax(*cluster, recHitsEB_);
0330       e4 = EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) +
0331            EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) +
0332            EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) +
0333            EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_);
0334       r9 = EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy();  //r9
0335     } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) {
0336       locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_);
0337       e1 = EcalClusterTools::eMax(*cluster, recHitsEE_);
0338       e4 = EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) +
0339            EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) +
0340            EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) +
0341            EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_);
0342       r9 = EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy();
0343     }
0344   }
0345   showerVars_[0] = r9;
0346   showerVars_[1] = sqrt(locCov_[0]);                                      //sigmaietaieta
0347   showerVars_[2] = locCov_[1];                                            //sigmaietaiphi
0348   showerVars_[3] = (!edm::isFinite(locCov_[2])) ? 0. : sqrt(locCov_[2]);  //sigmaiphiiphi
0349   showerVars_[4] = (e1 != 0.) ? 1. - e4 / e1 : -999.;                     //swiss_cross
0350   showerVars_[5] = cluster->hitsAndFractions().size();                    //nXtals
0351   showerVars_[6] = widths_.first;                                         //etaWidth
0352   showerVars_[7] = widths_.second;                                        //phiWidth
0353 
0354   return showerVars_;
0355 }
0356 
0357 void EcalClustersGraph::fillVariables() {
0358   LogDebug("EcalClustersGraph") << "Fill tensorflow input vector";
0359   const auto& deepSCEval = scProducerCache_->deepSCEvaluator;
0360   // Reserving the batch dimension
0361   inputs_.clustersX.reserve(nSeeds_);
0362   inputs_.hitsX.reserve(nSeeds_);
0363   inputs_.windowX.reserve(nSeeds_);
0364   inputs_.isSeed.reserve(nSeeds_);
0365 
0366   // Looping on all the seeds (window)
0367   for (uint is = 0; is < nSeeds_; is++) {
0368     const auto seedPointer = (*clusters_[is]).ptr().get();
0369     std::vector<DeepSCInputs::FeaturesMap> unscaledClusterFeatures;
0370     const auto& outEdges = graphMap_.getOutEdges(is);
0371     size_t ncls = outEdges.size();
0372     // Reserve the vector size
0373     inputs_.clustersX[is].reserve(ncls);
0374     inputs_.hitsX[is].reserve(ncls);
0375     inputs_.isSeed[is].reserve(ncls);
0376     unscaledClusterFeatures.reserve(ncls);
0377     // Loop on all the clusters
0378     for (const auto ic : outEdges) {
0379       LogTrace("EcalClustersGraph") << "seed: " << is << ", out edge --> " << ic;
0380       const auto clPointer = (*clusters_[ic]).ptr().get();
0381       const auto& clusterFeatures = computeVariables(seedPointer, clPointer);
0382       for (const auto& [key, val] : clusterFeatures) {
0383         LogTrace("EcalCluster") << key << "=" << val;
0384       }
0385       unscaledClusterFeatures.push_back(clusterFeatures);
0386       // Select and scale only the requested variables for the tensorflow model input
0387       inputs_.clustersX[is].push_back(deepSCEval->getScaledInputs(clusterFeatures, deepSCEval->inputFeaturesClusters));
0388       // The scaling and feature selection on hits is performed inside the function for each hit
0389       inputs_.hitsX[is].push_back(fillHits(clPointer));
0390       inputs_.isSeed[is].push_back(ic == is);
0391     }
0392     // For window we need the unscaled cluster features and then we select them
0393     inputs_.windowX[is] =
0394         deepSCEval->getScaledInputs(computeWindowVariables(unscaledClusterFeatures), deepSCEval->inputFeaturesWindows);
0395   }
0396   LogTrace("EcalClustersGraph") << "N. Windows: " << inputs_.clustersX.size();
0397 }
0398 
0399 void EcalClustersGraph::evaluateScores() {
0400   // Evaluate model
0401   const auto& scores = scProducerCache_->deepSCEvaluator->evaluate(inputs_);
0402   for (uint i = 0; i < nSeeds_; ++i) {
0403     uint k = 0;
0404     LogTrace("EcalClustersGraph") << "Score) seed: " << i << ":";
0405     for (auto const& j : graphMap_.getOutEdges(i)) {
0406       // Fill the scores from seed --> node (i --> j)
0407       // Not symmetrically, in order to save multiple values for seeds in other
0408       // seeds windows.
0409       graphMap_.setAdjMatrix(i, j, scores[i][k]);
0410       LogTrace("EcalClustersGraph") << "\t" << i << "-->" << j << ": " << scores[i][k];
0411       k++;
0412     }
0413   }
0414 }
0415 
0416 void EcalClustersGraph::setThresholds() {
0417   // Simple global threshold for the moment
0418   threshold_ = 0.5;
0419 }
0420 
0421 void EcalClustersGraph::selectClusters() {
0422   // Collect the final superClusters as subgraphs
0423   graphMap_.collectNodes(strategy_, threshold_);
0424 }
0425 
0426 EcalClustersGraph::EcalGraphOutput EcalClustersGraph::getGraphOutput() {
0427   EcalClustersGraph::EcalGraphOutput finalWindows_;
0428   const auto& finalSuperClusters_ = graphMap_.getGraphOutput();
0429   for (const auto& [is, cls] : finalSuperClusters_) {
0430     CalibratedClusterPtr seed = clusters_[is];
0431     CalibratedClusterPtrVector clusters_inWindow;
0432     for (const auto& ic : cls) {
0433       clusters_inWindow.push_back(clusters_[ic]);
0434     }
0435     finalWindows_.push_back({seed, clusters_inWindow});
0436   }
0437   return finalWindows_;
0438 }