Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-06-13 03:24:02

0001 // Author: Marco Rovere - marco.rovere@cern.ch
0002 // Date: 10/2021
0003 #include <algorithm>
0004 #include <set>
0005 #include <vector>
0006 
0007 #include "tbb/task_arena.h"
0008 #include "tbb/tbb.h"
0009 
0010 #include "DataFormats/Math/interface/deltaR.h"
0011 #include "DataFormats/Math/interface/LorentzVector.h"
0012 #include "DataFormats/Candidate/interface/Candidate.h"
0013 #include "FWCore/Framework/interface/Event.h"
0014 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0015 #include "FWCore/Utilities/interface/Exception.h"
0016 #include "PatternRecognitionbyFastJet.h"
0017 
0018 #include "TrackstersPCA.h"
0019 #include "Geometry/CaloGeometry/interface/CaloGeometry.h"
0020 #include "Geometry/Records/interface/CaloGeometryRecord.h"
0021 #include "FWCore/Framework/interface/EventSetup.h"
0022 
0023 #include "fastjet/ClusterSequence.hh"
0024 
0025 using namespace ticl;
0026 using namespace fastjet;
0027 
0028 template <typename TILES>
0029 PatternRecognitionbyFastJet<TILES>::PatternRecognitionbyFastJet(const edm::ParameterSet &conf,
0030                                                                 edm::ConsumesCollector iC)
0031     : PatternRecognitionAlgoBaseT<TILES>(conf, iC),
0032       caloGeomToken_(iC.esConsumes<CaloGeometry, CaloGeometryRecord>()),
0033       antikt_radius_(conf.getParameter<double>("antikt_radius")),
0034       minNumLayerCluster_(conf.getParameter<int>("minNumLayerCluster")),
0035       eidInputName_(conf.getParameter<std::string>("eid_input_name")),
0036       eidOutputNameEnergy_(conf.getParameter<std::string>("eid_output_name_energy")),
0037       eidOutputNameId_(conf.getParameter<std::string>("eid_output_name_id")),
0038       eidMinClusterEnergy_(conf.getParameter<double>("eid_min_cluster_energy")),
0039       eidNLayers_(conf.getParameter<int>("eid_n_layers")),
0040       eidNClusters_(conf.getParameter<int>("eid_n_clusters")),
0041       computeLocalTime_(conf.getParameter<bool>("computeLocalTime")){};
0042 
0043 template <typename TILES>
0044 void PatternRecognitionbyFastJet<TILES>::buildJetAndTracksters(std::vector<PseudoJet> &fjInputs,
0045                                                                std::vector<ticl::Trackster> &result) {
0046   if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Basic) {
0047     edm::LogVerbatim("PatternRecogntionbyFastJet")
0048         << "Creating FastJet with " << fjInputs.size() << " LayerClusters in input";
0049   }
0050   fastjet::ClusterSequence sequence(fjInputs, JetDefinition(antikt_algorithm, antikt_radius_));
0051   auto jets = fastjet::sorted_by_pt(sequence.inclusive_jets(0));
0052   if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Basic) {
0053     edm::LogVerbatim("PatternRecogntionbyFastJet") << "FastJet produced " << jets.size() << " jets/trackster";
0054   }
0055 
0056   auto trackster_idx = result.size();
0057   auto jetsSize = std::count_if(jets.begin(), jets.end(), [this](fastjet::PseudoJet jet) {
0058     return jet.constituents().size() > static_cast<unsigned int>(minNumLayerCluster_);
0059   });
0060   result.resize(trackster_idx + jetsSize);
0061 
0062   for (const auto &pj : jets) {
0063     if (pj.constituents().size() > static_cast<unsigned int>(minNumLayerCluster_)) {
0064       for (const auto &component : pj.constituents()) {
0065         result[trackster_idx].vertices().push_back(component.user_index());
0066         result[trackster_idx].vertex_multiplicity().push_back(1);
0067         if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Basic) {
0068           edm::LogVerbatim("PatternRecogntionbyFastJet")
0069               << "Jet has " << pj.constituents().size() << " components that are stored in trackster " << trackster_idx;
0070         }
0071       }
0072       trackster_idx++;
0073     } else {
0074       if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Advanced) {
0075         edm::LogVerbatim("PatternRecogntionbyFastJet")
0076             << "Jet with " << pj.constituents().size() << " constituents discarded since too small wrt "
0077             << minNumLayerCluster_;
0078       }
0079     }
0080   }
0081   fjInputs.clear();
0082 }
0083 
0084 template <typename TILES>
0085 void PatternRecognitionbyFastJet<TILES>::makeTracksters(
0086     const typename PatternRecognitionAlgoBaseT<TILES>::Inputs &input,
0087     std::vector<Trackster> &result,
0088     std::unordered_map<int, std::vector<int>> &seedToTracksterAssociation) {
0089   // Protect from events with no seeding regions
0090   if (input.regions.empty())
0091     return;
0092 
0093   edm::EventSetup const &es = input.es;
0094   const CaloGeometry &geom = es.getData(caloGeomToken_);
0095   rhtools_.setGeometry(geom);
0096 
0097   constexpr auto isHFnose = std::is_same<TILES, TICLLayerTilesHFNose>::value;
0098   constexpr int nEtaBin = TILES::constants_type_t::nEtaBins;
0099   constexpr int nPhiBin = TILES::constants_type_t::nPhiBins;
0100 
0101   // We need to partition the two sides of the HGCAL detector
0102   auto lastLayerPerSide = static_cast<unsigned int>(rhtools_.lastLayer(isHFnose)) - 1;
0103   unsigned int maxLayer = 2 * lastLayerPerSide - 1;
0104   std::vector<fastjet::PseudoJet> fjInputs;
0105   fjInputs.clear();
0106   for (unsigned int currentLayer = 0; currentLayer <= maxLayer; ++currentLayer) {
0107     if (currentLayer == lastLayerPerSide) {
0108       buildJetAndTracksters(fjInputs, result);
0109     }
0110     const auto &tileOnLayer = input.tiles[currentLayer];
0111     for (int ieta = 0; ieta <= nEtaBin; ++ieta) {
0112       auto offset = ieta * nPhiBin;
0113       if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Advanced) {
0114         edm::LogVerbatim("PatternRecogntionbyFastJet") << "offset: " << offset;
0115       }
0116       for (int iphi = 0; iphi <= nPhiBin; ++iphi) {
0117         if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Advanced) {
0118           edm::LogVerbatim("PatternRecogntionbyFastJet") << "iphi: " << iphi;
0119           edm::LogVerbatim("PatternRecogntionbyFastJet") << "Entries in tileBin: " << tileOnLayer[offset + iphi].size();
0120         }
0121         for (auto clusterIdx : tileOnLayer[offset + iphi]) {
0122           // Skip masked layer clusters
0123           if (input.mask[clusterIdx] == 0.) {
0124             if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Advanced) {
0125               edm::LogVerbatim("PatternRecogntionbyFastJet") << "Skipping masked layerIdx " << clusterIdx;
0126             }
0127             continue;
0128           }
0129           // Should we correct for the position of the PV?
0130           auto const &cl = input.layerClusters[clusterIdx];
0131           math::XYZVector direction(cl.x(), cl.y(), cl.z());
0132           direction = direction.Unit();
0133           direction *= cl.energy();
0134           auto fpj = fastjet::PseudoJet(direction.X(), direction.Y(), direction.Z(), cl.energy());
0135           fpj.set_user_index(clusterIdx);
0136           fjInputs.push_back(fpj);
0137         }  // End of loop on the clusters on currentLayer
0138       }    // End of loop over phi-bin region
0139     }      // End of loop over eta-bin region
0140   }        // End of loop over layers
0141 
0142   // Collect the jet from the other side wrt to the one taken care of inside the main loop above.
0143   buildJetAndTracksters(fjInputs, result);
0144 
0145   ticl::assignPCAtoTracksters(result,
0146                               input.layerClusters,
0147                               input.layerClustersTime,
0148                               rhtools_.getPositionLayer(rhtools_.lastLayerEE(isHFnose), isHFnose).z(),
0149                               computeLocalTime_);
0150 
0151   // run energy regression and ID
0152   energyRegressionAndID(input.layerClusters, input.tfSession, result);
0153   if (PatternRecognitionAlgoBaseT<TILES>::algo_verbosity_ > VerbosityLevel::Basic) {
0154     for (auto const &t : result) {
0155       edm::LogVerbatim("PatternRecogntionbyFastJet") << "Barycenter: " << t.barycenter();
0156       edm::LogVerbatim("PatternRecogntionbyFastJet") << "LCs: " << t.vertices().size();
0157       edm::LogVerbatim("PatternRecogntionbyFastJet") << "Energy: " << t.raw_energy();
0158       edm::LogVerbatim("PatternRecogntionbyFastJet") << "Regressed: " << t.regressed_energy();
0159     }
0160   }
0161 }
0162 
0163 template <typename TILES>
0164 void PatternRecognitionbyFastJet<TILES>::energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
0165                                                                const tensorflow::Session *eidSession,
0166                                                                std::vector<Trackster> &tracksters) {
0167   // Energy regression and particle identification strategy:
0168   //
0169   // 1. Set default values for regressed energy and particle id for each trackster.
0170   // 2. Store indices of tracksters whose total sum of cluster energies is above the
0171   //    eidMinClusterEnergy_ (GeV) threshold. Inference is not applied for soft tracksters.
0172   // 3. When no trackster passes the selection, return.
0173   // 4. Create input and output tensors. The batch dimension is determined by the number of
0174   //    selected tracksters.
0175   // 5. Fill input tensors with layer cluster features. Per layer, clusters are ordered descending
0176   //    by energy. Given that tensor data is contiguous in memory, we can use pointer arithmetic to
0177   //    fill values, even with batching.
0178   // 6. Zero-fill features for empty clusters in each layer.
0179   // 7. Batched inference.
0180   // 8. Assign the regressed energy and id probabilities to each trackster.
0181   //
0182   // Indices used throughout this method:
0183   // i -> batch element / trackster
0184   // j -> layer
0185   // k -> cluster
0186   // l -> feature
0187 
0188   // set default values per trackster, determine if the cluster energy threshold is passed,
0189   // and store indices of hard tracksters
0190   std::vector<int> tracksterIndices;
0191   for (int i = 0; i < static_cast<int>(tracksters.size()); i++) {
0192     // calculate the cluster energy sum (2)
0193     // note: after the loop, sumClusterEnergy might be just above the threshold which is enough to
0194     // decide whether to run inference for the trackster or not
0195     float sumClusterEnergy = 0.;
0196     for (const unsigned int &vertex : tracksters[i].vertices()) {
0197       sumClusterEnergy += static_cast<float>(layerClusters[vertex].energy());
0198       // there might be many clusters, so try to stop early
0199       if (sumClusterEnergy >= eidMinClusterEnergy_) {
0200         // set default values (1)
0201         tracksters[i].setRegressedEnergy(0.f);
0202         tracksters[i].zeroProbabilities();
0203         tracksterIndices.push_back(i);
0204         break;
0205       }
0206     }
0207   }
0208 
0209   // do nothing when no trackster passes the selection (3)
0210   int batchSize = static_cast<int>(tracksterIndices.size());
0211   if (batchSize == 0) {
0212     return;
0213   }
0214 
0215   // create input and output tensors (4)
0216   tensorflow::TensorShape shape({batchSize, eidNLayers_, eidNClusters_, eidNFeatures_});
0217   tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
0218   tensorflow::NamedTensorList inputList = {{eidInputName_, input}};
0219 
0220   std::vector<tensorflow::Tensor> outputs;
0221   std::vector<std::string> outputNames;
0222   if (!eidOutputNameEnergy_.empty()) {
0223     outputNames.push_back(eidOutputNameEnergy_);
0224   }
0225   if (!eidOutputNameId_.empty()) {
0226     outputNames.push_back(eidOutputNameId_);
0227   }
0228 
0229   // fill input tensor (5)
0230   for (int i = 0; i < batchSize; i++) {
0231     const Trackster &trackster = tracksters[tracksterIndices[i]];
0232 
0233     // per layer, we only consider the first eidNClusters_ clusters in terms of energy, so in order
0234     // to avoid creating large / nested structures to do the sorting for an unknown number of total
0235     // clusters, create a sorted list of layer cluster indices to keep track of the filled clusters
0236     std::vector<int> clusterIndices(trackster.vertices().size());
0237     for (int k = 0; k < (int)trackster.vertices().size(); k++) {
0238       clusterIndices[k] = k;
0239     }
0240     sort(clusterIndices.begin(), clusterIndices.end(), [&layerClusters, &trackster](const int &a, const int &b) {
0241       return layerClusters[trackster.vertices(a)].energy() > layerClusters[trackster.vertices(b)].energy();
0242     });
0243 
0244     // keep track of the number of seen clusters per layer
0245     std::vector<int> seenClusters(eidNLayers_);
0246 
0247     // loop through clusters by descending energy
0248     for (const int &k : clusterIndices) {
0249       // get features per layer and cluster and store the values directly in the input tensor
0250       const reco::CaloCluster &cluster = layerClusters[trackster.vertices(k)];
0251       int j = rhtools_.getLayerWithOffset(cluster.hitsAndFractions()[0].first) - 1;
0252       if (j < eidNLayers_ && seenClusters[j] < eidNClusters_) {
0253         // get the pointer to the first feature value for the current batch, layer and cluster
0254         float *features = &input.tensor<float, 4>()(i, j, seenClusters[j], 0);
0255 
0256         // fill features
0257         *(features++) = float(cluster.energy() / float(trackster.vertex_multiplicity(k)));
0258         *(features++) = float(std::abs(cluster.eta()));
0259         *(features) = float(cluster.phi());
0260 
0261         // increment seen clusters
0262         seenClusters[j]++;
0263       }
0264     }
0265 
0266     // zero-fill features of empty clusters in each layer (6)
0267     for (int j = 0; j < eidNLayers_; j++) {
0268       for (int k = seenClusters[j]; k < eidNClusters_; k++) {
0269         float *features = &input.tensor<float, 4>()(i, j, k, 0);
0270         for (int l = 0; l < eidNFeatures_; l++) {
0271           *(features++) = 0.f;
0272         }
0273       }
0274     }
0275   }
0276 
0277   // run the inference (7)
0278   tensorflow::run(eidSession, inputList, outputNames, &outputs);
0279 
0280   // store regressed energy per trackster (8)
0281   if (!eidOutputNameEnergy_.empty()) {
0282     // get the pointer to the energy tensor, dimension is batch x 1
0283     float *energy = outputs[0].flat<float>().data();
0284 
0285     for (const int &i : tracksterIndices) {
0286       tracksters[i].setRegressedEnergy(*(energy++));
0287     }
0288   }
0289 
0290   // store id probabilities per trackster (8)
0291   if (!eidOutputNameId_.empty()) {
0292     // get the pointer to the id probability tensor, dimension is batch x id_probabilities.size()
0293     int probsIdx = eidOutputNameEnergy_.empty() ? 0 : 1;
0294     float *probs = outputs[probsIdx].flat<float>().data();
0295 
0296     for (const int &i : tracksterIndices) {
0297       tracksters[i].setProbabilities(probs);
0298       probs += tracksters[i].id_probabilities().size();
0299     }
0300   }
0301 }
0302 
0303 template <typename TILES>
0304 void PatternRecognitionbyFastJet<TILES>::fillPSetDescription(edm::ParameterSetDescription &iDesc) {
0305   iDesc.add<int>("algo_verbosity", 0);
0306   iDesc.add<double>("antikt_radius", 0.09)->setComment("Radius to be used while running the Anti-kt clustering");
0307   iDesc.add<int>("minNumLayerCluster", 5)->setComment("Not Inclusive");
0308   iDesc.add<std::string>("eid_input_name", "input");
0309   iDesc.add<std::string>("eid_output_name_energy", "output/regressed_energy");
0310   iDesc.add<std::string>("eid_output_name_id", "output/id_probabilities");
0311   iDesc.add<double>("eid_min_cluster_energy", 1.);
0312   iDesc.add<int>("eid_n_layers", 50);
0313   iDesc.add<int>("eid_n_clusters", 10);
0314   iDesc.add<bool>("computeLocalTime", false);
0315 }
0316 
0317 template class ticl::PatternRecognitionbyFastJet<TICLLayerTiles>;