File indexing completed on 2024-08-21 04:46:45
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include <string>
0024 #include <memory>
0025 #include <algorithm>
0026 #include <vector>
0027
0028 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0029 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0030 #include "FWCore/ParameterSet/interface/allowedValues.h"
0031 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0032 #include "FWCore/Utilities/interface/FileInPath.h"
0033 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
0034
0035 #include "DataFormats/HGCalReco/interface/TICLLayerTile.h"
0036 #include "DataFormats/HGCalReco/interface/Trackster.h"
0037 #include "RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.h"
0038
0039 using namespace ticl;
0040
0041 TracksterLinkingbySuperClusteringDNN::TracksterLinkingbySuperClusteringDNN(const edm::ParameterSet& ps,
0042 edm::ConsumesCollector iC,
0043 cms::Ort::ONNXRuntime const* onnxRuntime)
0044 : TracksterLinkingAlgoBase(ps, iC, onnxRuntime),
0045 dnnInputs_(makeSuperclusteringDNNInputFromString(ps.getParameter<std::string>("dnnInputsVersion"))),
0046 inferenceBatchSize_(ps.getParameter<unsigned int>("inferenceBatchSize")),
0047 nnWorkingPoint_(ps.getParameter<double>("nnWorkingPoint")),
0048 deltaEtaWindow_(ps.getParameter<double>("deltaEtaWindow")),
0049 deltaPhiWindow_(ps.getParameter<double>("deltaPhiWindow")),
0050 seedPtThreshold_(ps.getParameter<double>("seedPtThreshold")),
0051 candidateEnergyThreshold_(ps.getParameter<double>("candidateEnergyThreshold")),
0052 explVarRatioCut_energyBoundary_(ps.getParameter<double>("candidateEnergyThreshold")),
0053 explVarRatioMinimum_lowEnergy_(ps.getParameter<double>("explVarRatioMinimum_lowEnergy")),
0054 explVarRatioMinimum_highEnergy_(ps.getParameter<double>("explVarRatioMinimum_highEnergy")),
0055 filterByTracksterPID_(ps.getParameter<bool>("filterByTracksterPID")),
0056 tracksterPIDCategoriesToFilter_(ps.getParameter<std::vector<int>>("tracksterPIDCategoriesToFilter")),
0057 PIDThreshold_(ps.getParameter<double>("PIDThreshold")) {
0058 assert(onnxRuntime_ &&
0059 "TracksterLinkingbySuperClusteringDNN : ONNXRuntime was not provided, the model should have been set in "
0060 "onnxModelPath in the plugin config");
0061 }
0062
0063 void TracksterLinkingbySuperClusteringDNN::initialize(const HGCalDDDConstants* hgcons,
0064 const hgcal::RecHitTools rhtools,
0065 const edm::ESHandle<MagneticField> bfieldH,
0066 const edm::ESHandle<Propagator> propH) {}
0067
0068
0069
0070
0071
0072 bool TracksterLinkingbySuperClusteringDNN::checkExplainedVarianceRatioCut(ticl::Trackster const& ts) const {
0073 float explVar_denominator =
0074 std::accumulate(std::begin(ts.eigenvalues()), std::end(ts.eigenvalues()), 0.f, std::plus<float>());
0075 if (explVar_denominator != 0.) {
0076 float explVarRatio = ts.eigenvalues()[0] / explVar_denominator;
0077 if (ts.raw_energy() > explVarRatioCut_energyBoundary_)
0078 return explVarRatio >= explVarRatioMinimum_highEnergy_;
0079 else
0080 return explVarRatio >= explVarRatioMinimum_lowEnergy_;
0081 } else
0082 return false;
0083 }
0084
0085 bool TracksterLinkingbySuperClusteringDNN::trackstersPassesPIDCut(const Trackster& tst) const {
0086 if (filterByTracksterPID_) {
0087 float probTotal = 0.0f;
0088 for (int cat : tracksterPIDCategoriesToFilter_) {
0089 probTotal += tst.id_probabilities(cat);
0090 }
0091 return probTotal >= PIDThreshold_;
0092 } else
0093 return true;
0094 }
0095
0096
0097
0098
0099
0100
0101
0102 void TracksterLinkingbySuperClusteringDNN::linkTracksters(
0103 const Inputs& input,
0104 std::vector<Trackster>& resultTracksters,
0105 std::vector<std::vector<unsigned int>>& outputSuperclusters,
0106 std::vector<std::vector<unsigned int>>& linkedTracksterIdToInputTracksterId) {
0107
0108 auto const& inputTracksters = input.tracksters;
0109 const unsigned int tracksterCount = inputTracksters.size();
0110
0111
0112
0113
0114
0115 std::vector<unsigned int> trackstersIndicesPt(inputTracksters.size());
0116 std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0);
0117 std::stable_sort(
0118 trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) {
0119 return inputTracksters[i1].raw_pt() > inputTracksters[i2].raw_pt();
0120 });
0121
0122
0123
0124
0125
0126 const unsigned int miniBatchSize =
0127 static_cast<unsigned int>(inferenceBatchSize_) / dnnInputs_->featureCount() * dnnInputs_->featureCount();
0128
0129 std::vector<std::vector<float>>
0130 inputTensorBatches;
0131
0132 unsigned int candidateIndexInCurrentBatch = miniBatchSize;
0133
0134
0135 std::vector<std::vector<std::pair<unsigned int, unsigned int>>> tracksterIndicesUsedInDNN;
0136
0137
0138 std::array<TICLLayerTile, 2> tracksterTilesBothEndcaps_pt;
0139 for (unsigned int i_pt = 0; i_pt < trackstersIndicesPt.size(); ++i_pt) {
0140 Trackster const& ts = inputTracksters[trackstersIndicesPt[i_pt]];
0141 tracksterTilesBothEndcaps_pt[ts.barycenter().eta() > 0.].fill(ts.barycenter().eta(), ts.barycenter().phi(), i_pt);
0142 }
0143
0144
0145 for (unsigned int ts_cand_idx_pt = 1; ts_cand_idx_pt < tracksterCount; ts_cand_idx_pt++) {
0146 Trackster const& ts_cand = inputTracksters[trackstersIndicesPt[ts_cand_idx_pt]];
0147
0148 if (ts_cand.raw_energy() < candidateEnergyThreshold_ ||
0149 !checkExplainedVarianceRatioCut(ts_cand))
0150 continue;
0151
0152 auto& tracksterTiles = tracksterTilesBothEndcaps_pt[ts_cand.barycenter().eta() > 0];
0153 std::array<int, 4> search_box = tracksterTiles.searchBoxEtaPhi(ts_cand.barycenter().Eta() - deltaEtaWindow_,
0154 ts_cand.barycenter().Eta() + deltaEtaWindow_,
0155 ts_cand.barycenter().Phi() - deltaPhiWindow_,
0156 ts_cand.barycenter().Phi() + deltaPhiWindow_);
0157
0158 for (int eta_i = search_box[0]; eta_i <= search_box[1]; ++eta_i) {
0159 for (int phi_i = search_box[2]; phi_i <= search_box[3]; ++phi_i) {
0160 for (unsigned int ts_seed_idx_pt :
0161 tracksterTiles[tracksterTiles.globalBin(eta_i, (phi_i % TileConstants::nPhiBins))]) {
0162 if (ts_seed_idx_pt >= ts_cand_idx_pt)
0163 continue;
0164
0165 Trackster const& ts_seed = inputTracksters[trackstersIndicesPt[ts_seed_idx_pt]];
0166
0167 if (ts_seed.raw_pt() < seedPtThreshold_)
0168 break;
0169
0170 if (!checkExplainedVarianceRatioCut(ts_seed) || !trackstersPassesPIDCut(ts_seed))
0171 continue;
0172
0173
0174 if (std::abs(ts_seed.barycenter().Eta() - ts_cand.barycenter().Eta()) < deltaEtaWindow_ &&
0175 std::abs(deltaPhi(ts_seed.barycenter().Phi(), ts_cand.barycenter().Phi())) < deltaPhiWindow_) {
0176 if (candidateIndexInCurrentBatch >= miniBatchSize) {
0177
0178 assert(candidateIndexInCurrentBatch == miniBatchSize);
0179
0180
0181
0182
0183
0184
0185 inputTensorBatches.emplace_back(
0186 std::min(miniBatchSize,
0187 (tracksterCount * (tracksterCount - 1) - ts_cand_idx_pt * (ts_cand_idx_pt - 1)) / 2) *
0188 dnnInputs_->featureCount());
0189
0190 candidateIndexInCurrentBatch = 0;
0191 tracksterIndicesUsedInDNN.emplace_back();
0192 }
0193
0194 std::vector<float> features = dnnInputs_->computeVector(ts_seed, ts_cand);
0195 assert(features.size() == dnnInputs_->featureCount());
0196 assert((candidateIndexInCurrentBatch + 1) * dnnInputs_->featureCount() <= inputTensorBatches.back().size());
0197
0198 std::copy(features.begin(),
0199 features.end(),
0200 inputTensorBatches.back().begin() + candidateIndexInCurrentBatch * dnnInputs_->featureCount());
0201 candidateIndexInCurrentBatch++;
0202 tracksterIndicesUsedInDNN.back().emplace_back(trackstersIndicesPt[ts_seed_idx_pt],
0203 trackstersIndicesPt[ts_cand_idx_pt]);
0204 }
0205 }
0206 }
0207 }
0208 }
0209
0210 if (inputTensorBatches.empty()) {
0211 LogDebug("HGCalTICLSuperclustering")
0212 << "No superclustering candidate pairs passed preselection before DNN. There are " << tracksterCount
0213 << " tracksters in this event.";
0214 }
0215
0216 #ifdef EDM_ML_DEBUG
0217 if (!inputTensorBatches.empty()) {
0218 std::ostringstream s;
0219
0220 for (unsigned int i = 0;
0221 i < std::min(dnnInputs_->featureCount() * 20, static_cast<unsigned int>(inputTensorBatches[0].size()));
0222 i++) {
0223 s << inputTensorBatches[0][i] << " ";
0224 if (i != 0 && i % dnnInputs_->featureCount() == 0)
0225 s << "],\t[";
0226 }
0227 LogDebug("HGCalTICLSuperclustering") << inputTensorBatches.size()
0228 << " batches were created. First batch starts as follows : [" << s.str()
0229 << "]";
0230 }
0231 #endif
0232
0233
0234 std::vector<std::vector<float>>
0235 batchOutputs;
0236 for (std::vector<float>& singleBatch : inputTensorBatches) {
0237
0238 std::vector<std::vector<float>> inputs_for_onnx{{std::move(singleBatch)}};
0239 std::vector<float> outputs = onnxRuntime_->run(
0240 {"input"}, inputs_for_onnx, {}, {}, inputs_for_onnx[0].size() / dnnInputs_->featureCount())[0];
0241 batchOutputs.push_back(std::move(outputs));
0242 }
0243
0244
0245
0246 std::vector<bool> tracksterMask(tracksterCount, false);
0247
0248
0249
0250 unsigned int previousCandTrackster_idx = std::numeric_limits<unsigned int>::max();
0251 unsigned int bestSeedForCurrentCandidate_idx = std::numeric_limits<unsigned int>::max();
0252 float bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_;
0253
0254
0255
0256 auto onCandidateTransition = [&](unsigned ts_cand_idx) {
0257 if (bestSeedForCurrentCandidate_idx <
0258 std::numeric_limits<unsigned int>::max()) {
0259 tracksterMask[ts_cand_idx] = true;
0260
0261
0262 std::vector<std::vector<unsigned int>>::iterator seed_supercluster_it =
0263 std::find_if(outputSuperclusters.begin(),
0264 outputSuperclusters.end(),
0265 [bestSeedForCurrentCandidate_idx](std::vector<unsigned int> const& sc) {
0266 return sc[0] == bestSeedForCurrentCandidate_idx;
0267 });
0268
0269 if (seed_supercluster_it == outputSuperclusters.end()) {
0270 outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
0271 resultTracksters.emplace_back(inputTracksters[bestSeedForCurrentCandidate_idx]);
0272 linkedTracksterIdToInputTracksterId.emplace_back(
0273 std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
0274 seed_supercluster_it = outputSuperclusters.end() - 1;
0275 tracksterMask[bestSeedForCurrentCandidate_idx] =
0276 true;
0277 }
0278
0279 unsigned int indexIntoOutputTracksters = seed_supercluster_it - outputSuperclusters.begin();
0280 seed_supercluster_it->push_back(ts_cand_idx);
0281 resultTracksters[indexIntoOutputTracksters].mergeTracksters(inputTracksters[ts_cand_idx]);
0282 linkedTracksterIdToInputTracksterId[indexIntoOutputTracksters].push_back(ts_cand_idx);
0283
0284 assert(outputSuperclusters.size() == resultTracksters.size() &&
0285 outputSuperclusters.size() == linkedTracksterIdToInputTracksterId.size());
0286 assert(seed_supercluster_it->size() == linkedTracksterIdToInputTracksterId[indexIntoOutputTracksters].size());
0287
0288 bestSeedForCurrentCandidate_idx = std::numeric_limits<unsigned int>::max();
0289 bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_;
0290 }
0291 };
0292
0293
0294 for (unsigned int batchIndex = 0; batchIndex < batchOutputs.size(); batchIndex++) {
0295 std::vector<float> const& currentBatchOutputs = batchOutputs[batchIndex];
0296
0297 for (unsigned int indexInBatch = 0; indexInBatch < tracksterIndicesUsedInDNN[batchIndex].size(); indexInBatch++) {
0298 assert(indexInBatch < static_cast<unsigned int>(batchOutputs[batchIndex].size()));
0299
0300 const unsigned int ts_seed_idx = tracksterIndicesUsedInDNN[batchIndex][indexInBatch].first;
0301 const unsigned int ts_cand_idx = tracksterIndicesUsedInDNN[batchIndex][indexInBatch].second;
0302 const float currentDnnScore = currentBatchOutputs[indexInBatch];
0303
0304 if (previousCandTrackster_idx != std::numeric_limits<unsigned int>::max() &&
0305 ts_cand_idx != previousCandTrackster_idx) {
0306
0307 onCandidateTransition(previousCandTrackster_idx);
0308 }
0309
0310 if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !tracksterMask[ts_seed_idx]) {
0311
0312 bestSeedForCurrentCandidate_idx = ts_seed_idx;
0313 bestSeedForCurrentCandidate_dnnScore = currentDnnScore;
0314 }
0315 previousCandTrackster_idx = ts_cand_idx;
0316 }
0317 }
0318 onCandidateTransition(previousCandTrackster_idx);
0319
0320
0321 for (unsigned int ts_id = 0; ts_id < tracksterCount; ts_id++) {
0322 if (!tracksterMask[ts_id] && inputTracksters[ts_id].raw_pt() >= seedPtThreshold_) {
0323 outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{ts_id});
0324 resultTracksters.emplace_back(inputTracksters[ts_id]);
0325 linkedTracksterIdToInputTracksterId.emplace_back(std::initializer_list<unsigned int>{ts_id});
0326 }
0327 }
0328
0329 #ifdef EDM_ML_DEBUG
0330 for (std::vector<unsigned int> const& sc : outputSuperclusters) {
0331 std::ostringstream s;
0332 for (unsigned int trackster_id : sc)
0333 s << trackster_id << " ";
0334 LogDebug("HGCalTICLSuperclustering") << "Created supercluster of size " << sc.size()
0335 << " holding tracksters (first one is seed) " << s.str();
0336 }
0337 #endif
0338 }
0339
0340 void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSetDescription& desc) {
0341 TracksterLinkingAlgoBase::fillPSetDescription(desc);
0342 desc.add<edm::FileInPath>("onnxModelPath")->setComment("Path to DNN (as ONNX model)");
0343 desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true),
0344 edm::allowedValues<std::string>("v1", "v2"))
0345 ->setComment(
0346 "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN.");
0347 desc.add<unsigned int>("inferenceBatchSize", 1e5)
0348 ->setComment(
0349 "Size of inference batches fed to DNN. Increasing it should produce faster inference but higher memory "
0350 "usage. "
0351 "Has no physics impact.");
0352 desc.add<double>("nnWorkingPoint")
0353 ->setComment("Working point of DNN (in [0, 1]). DNN score above WP will attempt to supercluster.");
0354 desc.add<double>("deltaEtaWindow", 0.1)
0355 ->setComment(
0356 "Size of delta eta window to consider for superclustering. Seed-candidate pairs outside this window "
0357 "are not considered for DNN inference.");
0358 desc.add<double>("deltaPhiWindow", 0.5)
0359 ->setComment(
0360 "Size of delta phi window to consider for superclustering. Seed-candidate pairs outside this window "
0361 "are not considered for DNN inference.");
0362 desc.add<double>("seedPtThreshold", 4.)
0363 ->setComment("Minimum transverse energy of trackster to be considered as seed of a supercluster");
0364 desc.add<double>("candidateEnergyThreshold", 2.)
0365 ->setComment("Minimum energy of trackster to be considered as candidate for superclustering");
0366 desc.add<double>("explVarRatioCut_energyBoundary", 50.)
0367 ->setComment("Boundary energy between low and high energy explVarRatio cut threshold");
0368 desc.add<double>("explVarRatioMinimum_lowEnergy", 0.92)
0369 ->setComment(
0370 "Cut on explained variance ratio of tracksters to be considered as candidate, "
0371 "for trackster raw_energy < explVarRatioCut_energyBoundary");
0372 desc.add<double>("explVarRatioMinimum_highEnergy", 0.95)
0373 ->setComment(
0374 "Cut on explained variance ratio of tracksters to be considered as candidate, "
0375 "for trackster raw_energy > explVarRatioCut_energyBoundary");
0376 desc.add<bool>("filterByTracksterPID", true)->setComment("Filter tracksters before superclustering by PID score");
0377 desc.add<std::vector<int>>(
0378 "tracksterPIDCategoriesToFilter",
0379 {static_cast<int>(Trackster::ParticleType::photon), static_cast<int>(Trackster::ParticleType::electron)})
0380 ->setComment("List of PID particle types (ticl::Trackster::ParticleType enum) to consider for PID filtering");
0381 desc.add<double>("PIDThreshold", 0.8)->setComment("PID score threshold");
0382 }