File indexing completed on 2024-08-21 04:46:44
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <cmath>
0011 #include <vector>
0012 #include <memory>
0013 #include <algorithm>
0014
0015 #include <TTree.h>
0016
0017 #include "FWCore/Framework/interface/Frameworkfwd.h"
0018 #include "FWCore/Framework/interface/one/EDAnalyzer.h"
0019 #include "FWCore/Framework/interface/Event.h"
0020 #include "FWCore/Framework/interface/MakerMacros.h"
0021 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0022 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0023 #include "FWCore/ParameterSet/interface/allowedValues.h"
0024 #include "FWCore/Utilities/interface/InputTag.h"
0025 #include "FWCore/ServiceRegistry/interface/Service.h"
0026
0027 #include "CommonTools/UtilAlgos/interface/TFileService.h"
0028
0029 #include "DataFormats/Math/interface/deltaPhi.h"
0030 #include "DataFormats/HGCalReco/interface/Trackster.h"
0031 #include "SimDataFormats/CaloAnalysis/interface/CaloParticle.h"
0032 #include "SimDataFormats/Associations/interface/TracksterToSimTracksterAssociator.h"
0033
0034 #include "RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.h"
0035 #include "RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h"
0036
0037 using namespace ticl;
0038
0039 class SuperclusteringSampleDumper : public edm::one::EDAnalyzer<edm::one::SharedResources> {
0040 public:
0041 explicit SuperclusteringSampleDumper(const edm::ParameterSet&);
0042
0043 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0044
0045 private:
0046 void beginJob() override;
0047 void analyze(const edm::Event&, const edm::EventSetup&) override;
0048 bool checkExplainedVarianceRatioCut(ticl::Trackster const& ts) const;
0049
0050 const edm::EDGetTokenT<std::vector<Trackster>> tracksters_clue3d_token_;
0051 const edm::EDGetTokenT<ticl::RecoToSimCollectionSimTracksters> tsRecoToSimCP_token_;
0052 float deltaEtaWindow_;
0053 float deltaPhiWindow_;
0054 float seedPtThreshold_;
0055 float candidateEnergyThreshold_;
0056 float explVarRatioCut_energyBoundary_;
0057 float explVarRatioMinimum_lowEnergy_;
0058 float explVarRatioMinimum_highEnergy_;
0059
0060 TTree* output_tree_;
0061 edm::EventID eventId_;
0062 std::unique_ptr<AbstractSuperclusteringDNNInput> dnnInput_;
0063 std::vector<std::vector<float>>
0064 features_;
0065 std::vector<unsigned int> seedTracksterIdx_;
0066 std::vector<unsigned int> candidateTracksterIdx_;
0067
0068 std::vector<float>
0069 seedTracksterBestAssociationScore_;
0070 std::vector<long>
0071 seedTracksterBestAssociation_simTsIdx_;
0072 std::vector<float> seedTracksterBestAssociation_caloParticleEnergy_;
0073
0074 std::vector<float>
0075 candidateTracksterBestAssociationScore_;
0076 std::vector<long>
0077 candidateTracksterBestAssociation_simTsIdx_;
0078
0079 std::vector<float>
0080 candidateTracksterAssociationWithSeed_score_;
0081 };
0082
0083 SuperclusteringSampleDumper::SuperclusteringSampleDumper(const edm::ParameterSet& ps)
0084 : tracksters_clue3d_token_(consumes<std::vector<Trackster>>(ps.getParameter<edm::InputTag>("tracksters"))),
0085 tsRecoToSimCP_token_(
0086 consumes<ticl::RecoToSimCollectionSimTracksters>(ps.getParameter<edm::InputTag>("recoToSimAssociatorCP"))),
0087 deltaEtaWindow_(ps.getParameter<double>("deltaEtaWindow")),
0088 deltaPhiWindow_(ps.getParameter<double>("deltaPhiWindow")),
0089 seedPtThreshold_(ps.getParameter<double>("seedPtThreshold")),
0090 candidateEnergyThreshold_(ps.getParameter<double>("candidateEnergyThreshold")),
0091 explVarRatioCut_energyBoundary_(ps.getParameter<double>("candidateEnergyThreshold")),
0092 explVarRatioMinimum_lowEnergy_(ps.getParameter<double>("explVarRatioMinimum_lowEnergy")),
0093 explVarRatioMinimum_highEnergy_(ps.getParameter<double>("explVarRatioMinimum_highEnergy")),
0094 eventId_(),
0095 dnnInput_(makeSuperclusteringDNNInputFromString(ps.getParameter<std::string>("dnnInputsVersion"))),
0096 features_(dnnInput_->featureCount()) {
0097 usesResource("TFileService");
0098 }
0099
0100 void SuperclusteringSampleDumper::beginJob() {
0101 edm::Service<TFileService> fs;
0102 output_tree_ = fs->make<TTree>("superclusteringTraining", "Superclustering training samples");
0103 output_tree_->Branch("Event", &eventId_);
0104 output_tree_->Branch("seedTracksterIdx", &seedTracksterIdx_);
0105 output_tree_->Branch("candidateTracksterIdx", &candidateTracksterIdx_);
0106 output_tree_->Branch("seedTracksterBestAssociationScore", &seedTracksterBestAssociationScore_);
0107 output_tree_->Branch("seedTracksterBestAssociation_simTsIdx", &seedTracksterBestAssociation_simTsIdx_);
0108 output_tree_->Branch("seedTracksterBestAssociation_caloParticleEnergy",
0109 &seedTracksterBestAssociation_caloParticleEnergy_);
0110 output_tree_->Branch("candidateTracksterBestAssociationScore", &candidateTracksterBestAssociationScore_);
0111 output_tree_->Branch("candidateTracksterBestAssociation_simTsIdx", &candidateTracksterBestAssociation_simTsIdx_);
0112 output_tree_->Branch("candidateTracksterAssociationWithSeed_score", &candidateTracksterAssociationWithSeed_score_);
0113 std::vector<std::string> featureNames = dnnInput_->featureNames();
0114 assert(featureNames.size() == dnnInput_->featureCount());
0115 for (unsigned int i = 0; i < dnnInput_->featureCount(); i++) {
0116 output_tree_->Branch(("feature_" + featureNames[i]).c_str(), &features_[i]);
0117 }
0118 }
0119
0120
0121
0122
0123
0124 bool SuperclusteringSampleDumper::checkExplainedVarianceRatioCut(ticl::Trackster const& ts) const {
0125 float explVar_denominator =
0126 std::accumulate(std::begin(ts.eigenvalues()), std::end(ts.eigenvalues()), 0.f, std::plus<float>());
0127 if (explVar_denominator != 0.) {
0128 float explVarRatio = ts.eigenvalues()[0] / explVar_denominator;
0129 if (ts.raw_energy() > explVarRatioCut_energyBoundary_)
0130 return explVarRatio >= explVarRatioMinimum_highEnergy_;
0131 else
0132 return explVarRatio >= explVarRatioMinimum_lowEnergy_;
0133 } else
0134 return false;
0135 }
0136
0137 void SuperclusteringSampleDumper::analyze(const edm::Event& evt, const edm::EventSetup& iSetup) {
0138 eventId_ = evt.id();
0139
0140 edm::Handle<std::vector<Trackster>> inputTracksters;
0141 evt.getByToken(tracksters_clue3d_token_, inputTracksters);
0142
0143 edm::Handle<ticl::RecoToSimCollectionSimTracksters> assoc_CP_recoToSim;
0144 evt.getByToken(tsRecoToSimCP_token_, assoc_CP_recoToSim);
0145
0146
0147
0148
0149
0150 std::vector<unsigned int> trackstersIndicesPt(inputTracksters->size());
0151 std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0);
0152 std::stable_sort(
0153 trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) {
0154 return (*inputTracksters)[i1].raw_pt() > (*inputTracksters)[i2].raw_pt();
0155 });
0156
0157
0158
0159
0160 for (unsigned int ts_seed_idx_pt = 0; ts_seed_idx_pt < inputTracksters->size(); ts_seed_idx_pt++) {
0161 const unsigned int ts_seed_idx_input =
0162 trackstersIndicesPt[ts_seed_idx_pt];
0163 Trackster const& ts_seed = (*inputTracksters)[ts_seed_idx_input];
0164
0165 if (ts_seed.raw_pt() < seedPtThreshold_)
0166 break;
0167
0168 if (!checkExplainedVarianceRatioCut(ts_seed))
0169 continue;
0170
0171
0172 auto seed_assocs = assoc_CP_recoToSim->find({inputTracksters, ts_seed_idx_input});
0173 if (seed_assocs == assoc_CP_recoToSim->end())
0174 continue;
0175 ticl::RecoToSimCollectionSimTracksters::data_type const& seed_assocWithBestScore =
0176 *std::min_element(seed_assocs->val.begin(),
0177 seed_assocs->val.end(),
0178 [](ticl::RecoToSimCollectionSimTracksters::data_type const& assoc_1,
0179 ticl::RecoToSimCollectionSimTracksters::data_type const& assoc_2) {
0180
0181
0182 return assoc_1.second.second < assoc_2.second.second;
0183 });
0184
0185
0186
0187 for (unsigned int ts_cand_idx_pt = ts_seed_idx_pt + 1; ts_cand_idx_pt < inputTracksters->size(); ts_cand_idx_pt++) {
0188 Trackster const& ts_cand = (*inputTracksters)[trackstersIndicesPt[ts_cand_idx_pt]];
0189
0190
0191 if (!(std::abs(ts_seed.barycenter().Eta() - ts_cand.barycenter().Eta()) < deltaEtaWindow_ &&
0192 std::abs(deltaPhi(ts_seed.barycenter().Phi(), ts_cand.barycenter().Phi())) < deltaPhiWindow_ &&
0193 ts_cand.raw_energy() >= candidateEnergyThreshold_ && checkExplainedVarianceRatioCut(ts_cand)))
0194 continue;
0195
0196 std::vector<float> features = dnnInput_->computeVector(ts_seed, ts_cand);
0197 assert(features.size() == features_.size());
0198 for (unsigned int feature_idx = 0; feature_idx < features_.size(); feature_idx++) {
0199 features_[feature_idx].push_back(features[feature_idx]);
0200 }
0201 seedTracksterIdx_.push_back(trackstersIndicesPt[ts_seed_idx_pt]);
0202 candidateTracksterIdx_.push_back(trackstersIndicesPt[ts_cand_idx_pt]);
0203
0204 float candidateTracksterBestAssociationScore = 1.;
0205 long candidateTracksterBestAssociation_simTsIdx = -1;
0206 float candidateTracksterAssociationWithSeed_score =
0207 1.;
0208
0209
0210 auto cand_assocCP = assoc_CP_recoToSim->find(
0211 edm::Ref<ticl::TracksterCollection>(inputTracksters, trackstersIndicesPt[ts_cand_idx_pt]));
0212 if (cand_assocCP != assoc_CP_recoToSim->end()) {
0213
0214 ticl::RecoToSimCollectionSimTracksters::data_type const& cand_assocWithBestScore =
0215 *std::min_element(cand_assocCP->val.begin(),
0216 cand_assocCP->val.end(),
0217 [](ticl::RecoToSimCollectionSimTracksters::data_type const& assoc_1,
0218 ticl::RecoToSimCollectionSimTracksters::data_type const& assoc_2) {
0219
0220 return assoc_1.second.second < assoc_2.second.second;
0221 });
0222 candidateTracksterBestAssociationScore = cand_assocWithBestScore.second.second;
0223 candidateTracksterBestAssociation_simTsIdx = cand_assocWithBestScore.first.key();
0224
0225
0226 auto cand_assocWithSeedCP =
0227 std::find_if(cand_assocCP->val.begin(),
0228 cand_assocCP->val.end(),
0229 [&seed_assocWithBestScore](ticl::RecoToSimCollectionSimTracksters::data_type const& assoc) {
0230
0231 return assoc.first == seed_assocWithBestScore.first;
0232 });
0233 if (cand_assocWithSeedCP != cand_assocCP->val.end()) {
0234 candidateTracksterAssociationWithSeed_score = cand_assocWithSeedCP->second.second;
0235 }
0236 }
0237
0238 seedTracksterBestAssociationScore_.push_back(seed_assocWithBestScore.second.second);
0239 seedTracksterBestAssociation_simTsIdx_.push_back(seed_assocWithBestScore.first.key());
0240 seedTracksterBestAssociation_caloParticleEnergy_.push_back(seed_assocWithBestScore.first->regressed_energy());
0241
0242 candidateTracksterBestAssociationScore_.push_back(candidateTracksterBestAssociationScore);
0243 candidateTracksterBestAssociation_simTsIdx_.push_back(candidateTracksterBestAssociation_simTsIdx);
0244
0245 candidateTracksterAssociationWithSeed_score_.push_back(candidateTracksterAssociationWithSeed_score);
0246 }
0247 }
0248
0249 output_tree_->Fill();
0250 for (auto& feats : features_)
0251 feats.clear();
0252 seedTracksterIdx_.clear();
0253 candidateTracksterIdx_.clear();
0254 seedTracksterBestAssociationScore_.clear();
0255 seedTracksterBestAssociation_simTsIdx_.clear();
0256 seedTracksterBestAssociation_caloParticleEnergy_.clear();
0257 candidateTracksterBestAssociationScore_.clear();
0258 candidateTracksterBestAssociation_simTsIdx_.clear();
0259 candidateTracksterAssociationWithSeed_score_.clear();
0260 }
0261
0262 void SuperclusteringSampleDumper::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0263 edm::ParameterSetDescription desc;
0264 desc.add<edm::InputTag>("tracksters", edm::InputTag("ticlTrackstersCLUE3DHigh"))
0265 ->setComment("Input trackster collection, same as what is used for superclustering inference.");
0266 desc.add<edm::InputTag>("recoToSimAssociatorCP",
0267 edm::InputTag("tracksterSimTracksterAssociationLinkingbyCLUE3D", "recoToSim"));
0268 desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true),
0269 edm::allowedValues<std::string>("v1", "v2"))
0270 ->setComment(
0271 "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN.");
0272
0273 desc.add<double>("deltaEtaWindow", 0.2)
0274 ->setComment(
0275 "Size of delta eta window to consider for superclustering. Seed-candidate pairs outside this window "
0276 "are not considered for DNN inference.");
0277 desc.add<double>("deltaPhiWindow", 0.7)
0278 ->setComment(
0279 "Size of delta phi window to consider for superclustering. Seed-candidate pairs outside this window "
0280 "are not considered for DNN inference.");
0281 desc.add<double>("seedPtThreshold", 3.)
0282 ->setComment("Minimum transverse momentum of trackster to be considered as seed of a supercluster");
0283 desc.add<double>("candidateEnergyThreshold", 1.5)
0284 ->setComment("Minimum energy of trackster to be considered as candidate for superclustering");
0285 desc.add<double>("explVarRatioCut_energyBoundary", 50.)
0286 ->setComment("Boundary energy between low and high energy explVarRatio cut threshold");
0287 desc.add<double>("explVarRatioMinimum_lowEnergy", 0.85)
0288 ->setComment(
0289 "Cut on explained variance ratio of tracksters to be considered as candidate, "
0290 "for trackster raw_energy < explVarRatioCut_energyBoundary");
0291 desc.add<double>("explVarRatioMinimum_highEnergy", 0.9)
0292 ->setComment(
0293 "Cut on explained variance ratio of tracksters to be considered as candidate, "
0294 "for trackster raw_energy > explVarRatioCut_energyBoundary");
0295 descriptions.add("superclusteringSampleDumper", desc);
0296 }
0297
0298 DEFINE_FWK_MODULE(SuperclusteringSampleDumper);