Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:25:07

0001 // -*- C++ -*-
0002 //
0003 // Package:    RecoEgamma/ElectronIdentification
0004 // Class:      ElectronMVANtuplizer
0005 //
0006 /**\class ElectronMVANtuplizer ElectronMVANtuplizer.cc RecoEgamma/ElectronIdentification/plugins/ElectronMVANtuplizer.cc
0007 
0008  Description: Ntuplizer for training and testing electron MVA IDs.
0009 
0010  Implementation:
0011      [Notes on implementation]
0012 */
0013 //
0014 // Original Author:  Jonas REMBSER
0015 //         Created:  Thu, 22 Mar 2018 14:54:24 GMT
0016 //
0017 //
0018 
0019 // user include files
0020 #include "FWCore/Framework/interface/Frameworkfwd.h"
0021 #include "FWCore/Framework/interface/one/EDAnalyzer.h"
0022 #include "FWCore/Framework/interface/Event.h"
0023 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0024 #include "FWCore/Utilities/interface/InputTag.h"
0025 #include "FWCore/ServiceRegistry/interface/Service.h"
0026 #include "FWCore/Framework/interface/MakerMacros.h"
0027 #include "CommonTools/UtilAlgos/interface/TFileService.h"
0028 #include "DataFormats/EgammaCandidates/interface/GsfElectron.h"
0029 #include "DataFormats/PatCandidates/interface/Electron.h"
0030 #include "RecoEgamma/EgammaTools/interface/MVAVariableManager.h"
0031 #include "RecoEgamma/EgammaTools/interface/MVAVariableHelper.h"
0032 #include "FWCore/Utilities/interface/EDGetToken.h"
0033 #include "RecoEcal/EgammaCoreTools/interface/EcalClusterLazyTools.h"
0034 #include "SimDataFormats/PileupSummaryInfo/interface/PileupSummaryInfo.h"
0035 #include "DataFormats/VertexReco/interface/Vertex.h"
0036 #include "DataFormats/VertexReco/interface/VertexFwd.h"
0037 
0038 #include <TTree.h>
0039 #include <TFile.h>
0040 #include <Math/VectorUtil.h>
0041 
0042 //
0043 // class declaration
0044 //
0045 
0046 class ElectronMVANtuplizer : public edm::one::EDAnalyzer<edm::one::SharedResources> {
0047 public:
0048   explicit ElectronMVANtuplizer(const edm::ParameterSet&);
0049 
0050   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0051 
0052 private:
0053   void analyze(const edm::Event&, const edm::EventSetup&) override;
0054 
0055   // method called once each job just before starting event loop
0056   void beginJob() override{};
0057   // method called once each job just after ending the event loop
0058   void endJob() override{};
0059 
0060   int matchToTruth(reco::GsfElectron const& electron, edm::View<reco::GenParticle> const& genParticles) const;
0061 
0062   // ----------member data ---------------------------
0063 
0064   //global variables
0065   int nEvent_;
0066   int nRun_;
0067   int nLumi_;
0068   int genNpu_;
0069   int vtxN_;
0070 
0071   // electron variables
0072   float eleQ_;
0073   int ele3Q_;
0074   int matchedToGenEle_;
0075 
0076   std::vector<float> energyMatrix_;
0077 
0078   // gap variables
0079   bool eleIsEB_;
0080   bool eleIsEE_;
0081   bool eleIsEBEtaGap_;
0082   bool eleIsEBPhiGap_;
0083   bool eleIsEBEEGap_;
0084   bool eleIsEEDeeGap_;
0085   bool eleIsEERingGap_;
0086 
0087   // config
0088   const bool isMC_;
0089   const double deltaR_;
0090   const double ptThreshold_;
0091 
0092   // ID decisions objects
0093   const std::vector<std::string> eleMapTags_;
0094   std::vector<edm::EDGetTokenT<edm::ValueMap<bool>>> eleMapTokens_;
0095   const std::vector<std::string> eleMapBranchNames_;
0096   const size_t nEleMaps_;
0097 
0098   // MVA values and categories (optional)
0099   const std::vector<std::string> valMapTags_;
0100   std::vector<edm::EDGetTokenT<edm::ValueMap<float>>> valMapTokens_;
0101   const std::vector<std::string> valMapBranchNames_;
0102   const size_t nValMaps_;
0103 
0104   const std::vector<std::string> mvaCatTags_;
0105   std::vector<edm::EDGetTokenT<edm::ValueMap<int>>> mvaCatTokens_;
0106   const std::vector<std::string> mvaCatBranchNames_;
0107   const size_t nCats_;
0108 
0109   // Tokens
0110   const edm::EDGetTokenT<edm::View<reco::GsfElectron>> src_;
0111   const edm::EDGetTokenT<std::vector<reco::Vertex>> vertices_;
0112   const edm::EDGetTokenT<std::vector<PileupSummaryInfo>> pileup_;
0113   const edm::EDGetTokenT<edm::View<reco::GenParticle>> genParticles_;
0114   const edm::EDGetTokenT<EcalRecHitCollection> ebRecHits_;
0115   const edm::EDGetTokenT<EcalRecHitCollection> eeRecHits_;
0116 
0117   const EcalClusterLazyTools::ESGetTokens ecalClusterToolsESGetTokens_;
0118 
0119   // to hold ID decisions and categories
0120   std::vector<int> mvaPasses_;
0121   std::vector<float> mvaValues_;
0122   std::vector<int> mvaCats_;
0123 
0124   // To get the auxiliary MVA variables
0125   const MVAVariableHelper variableHelper_;
0126 
0127   // other
0128   TTree* tree_;
0129 
0130   MVAVariableManager<reco::GsfElectron> mvaVarMngr_;
0131   const int nVars_;
0132   std::vector<float> vars_;
0133 
0134   const bool doEnergyMatrix_;
0135   const int energyMatrixSize_;
0136 };
0137 
0138 //
0139 // constants, enums and typedefs
0140 //
0141 
0142 enum ElectronMatchType {
0143   UNMATCHED,
0144   TRUE_PROMPT_ELECTRON,
0145   TRUE_ELECTRON_FROM_TAU,
0146   TRUE_NON_PROMPT_ELECTRON,
0147 };  // The last does not include tau parents
0148 
0149 //
0150 // constructors and destructor
0151 //
0152 ElectronMVANtuplizer::ElectronMVANtuplizer(const edm::ParameterSet& iConfig)
0153     : isMC_(iConfig.getParameter<bool>("isMC")),
0154       deltaR_(iConfig.getParameter<double>("deltaR")),
0155       ptThreshold_(iConfig.getParameter<double>("ptThreshold")),
0156       eleMapTags_(iConfig.getParameter<std::vector<std::string>>("eleMVAs")),
0157       eleMapBranchNames_(iConfig.getParameter<std::vector<std::string>>("eleMVALabels")),
0158       nEleMaps_(eleMapBranchNames_.size()),
0159       valMapTags_(iConfig.getParameter<std::vector<std::string>>("eleMVAValMaps")),
0160       valMapBranchNames_(iConfig.getParameter<std::vector<std::string>>("eleMVAValMapLabels")),
0161       nValMaps_(valMapBranchNames_.size()),
0162       mvaCatTags_(iConfig.getParameter<std::vector<std::string>>("eleMVACats")),
0163       mvaCatBranchNames_(iConfig.getParameter<std::vector<std::string>>("eleMVACatLabels")),
0164       nCats_(mvaCatBranchNames_.size()),
0165       src_(consumes<edm::View<reco::GsfElectron>>(iConfig.getParameter<edm::InputTag>("src"))),
0166       vertices_(consumes<std::vector<reco::Vertex>>(iConfig.getParameter<edm::InputTag>("vertices"))),
0167       pileup_(consumes<std::vector<PileupSummaryInfo>>(iConfig.getParameter<edm::InputTag>("pileup"))),
0168       genParticles_(consumes<edm::View<reco::GenParticle>>(iConfig.getParameter<edm::InputTag>("genParticles"))),
0169       ebRecHits_(consumes<EcalRecHitCollection>(iConfig.getParameter<edm::InputTag>("ebReducedRecHitCollection"))),
0170       eeRecHits_(consumes<EcalRecHitCollection>(iConfig.getParameter<edm::InputTag>("eeReducedRecHitCollection"))),
0171       ecalClusterToolsESGetTokens_{consumesCollector()},
0172       mvaPasses_(nEleMaps_),
0173       mvaValues_(nValMaps_),
0174       mvaCats_(nCats_),
0175       variableHelper_(consumesCollector()),
0176       mvaVarMngr_(iConfig.getParameter<std::string>("variableDefinition"), MVAVariableHelper::indexMap()),
0177       nVars_(mvaVarMngr_.getNVars()),
0178       vars_(nVars_),
0179       doEnergyMatrix_(iConfig.getParameter<bool>("doEnergyMatrix")),
0180       energyMatrixSize_(iConfig.getParameter<int>("energyMatrixSize")) {
0181   // eleMaps
0182   for (auto const& tag : eleMapTags_) {
0183     eleMapTokens_.push_back(consumes<edm::ValueMap<bool>>(edm::InputTag(tag)));
0184   }
0185   // valMaps
0186   for (auto const& tag : valMapTags_) {
0187     valMapTokens_.push_back(consumes<edm::ValueMap<float>>(edm::InputTag(tag)));
0188   }
0189   // categories
0190   for (auto const& tag : mvaCatTags_) {
0191     mvaCatTokens_.push_back(consumes<edm::ValueMap<int>>(edm::InputTag(tag)));
0192   }
0193 
0194   // Book tree
0195   usesResource(TFileService::kSharedResource);
0196   edm::Service<TFileService> fs;
0197   tree_ = fs->make<TTree>("tree", "tree");
0198 
0199   tree_->Branch("nEvent", &nEvent_);
0200   tree_->Branch("nRun", &nRun_);
0201   tree_->Branch("nLumi", &nLumi_);
0202   if (isMC_)
0203     tree_->Branch("genNpu", &genNpu_);
0204   tree_->Branch("vtxN", &vtxN_);
0205 
0206   tree_->Branch("ele_q", &eleQ_);
0207   tree_->Branch("ele_3q", &ele3Q_);
0208 
0209   if (doEnergyMatrix_)
0210     tree_->Branch("energyMatrix", &energyMatrix_);
0211 
0212   if (isMC_)
0213     tree_->Branch("matchedToGenEle", &matchedToGenEle_);
0214 
0215   for (int i = 0; i < nVars_; ++i)
0216     tree_->Branch(mvaVarMngr_.getName(i).c_str(), &vars_[i]);
0217 
0218   tree_->Branch("ele_isEB", &eleIsEB_);
0219   tree_->Branch("ele_isEE", &eleIsEE_);
0220   tree_->Branch("ele_isEBEtaGap", &eleIsEBEtaGap_);
0221   tree_->Branch("ele_isEBPhiGap", &eleIsEBPhiGap_);
0222   tree_->Branch("ele_isEBEEGap", &eleIsEBEEGap_);
0223   tree_->Branch("ele_isEEDeeGap", &eleIsEEDeeGap_);
0224   tree_->Branch("ele_isEERingGap", &eleIsEERingGap_);
0225 
0226   // IDs
0227   for (size_t k = 0; k < nValMaps_; ++k) {
0228     tree_->Branch(valMapBranchNames_[k].c_str(), &mvaValues_[k]);
0229   }
0230 
0231   for (size_t k = 0; k < nEleMaps_; ++k) {
0232     tree_->Branch(eleMapBranchNames_[k].c_str(), &mvaPasses_[k]);
0233   }
0234 
0235   for (size_t k = 0; k < nCats_; ++k) {
0236     tree_->Branch(mvaCatBranchNames_[k].c_str(), &mvaCats_[k]);
0237   }
0238 }
0239 
0240 // ------------ method called for each event  ------------
0241 void ElectronMVANtuplizer::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) {
0242   // Fill global event info
0243   nEvent_ = iEvent.id().event();
0244   nRun_ = iEvent.id().run();
0245   nLumi_ = iEvent.luminosityBlock();
0246 
0247   // Get Handles
0248   auto src = iEvent.getHandle(src_);
0249   auto vertices = iEvent.getHandle(vertices_);
0250 
0251   // initialize cluster tools
0252   std::unique_ptr<noZS::EcalClusterLazyTools> lazyTools;
0253   if (doEnergyMatrix_) {
0254     // Configure Lazy Tools, which will compute 5x5 quantities
0255     lazyTools = std::make_unique<noZS::EcalClusterLazyTools>(
0256         iEvent, ecalClusterToolsESGetTokens_.get(iSetup), ebRecHits_, eeRecHits_);
0257   }
0258 
0259   // Get MC only Handles, which are allowed to be non-valid
0260   auto genParticles = iEvent.getHandle(genParticles_);
0261   auto pileup = iEvent.getHandle(pileup_);
0262 
0263   vtxN_ = vertices->size();
0264 
0265   // Fill with true number of pileup
0266   if (isMC_) {
0267     for (const auto& pu : *pileup) {
0268       int bx = pu.getBunchCrossing();
0269       if (bx == 0) {
0270         genNpu_ = pu.getPU_NumInteractions();
0271         break;
0272       }
0273     }
0274   }
0275 
0276   // Get MVA decisions
0277   edm::Handle<edm::ValueMap<bool>> decisions[nEleMaps_];
0278   for (size_t k = 0; k < nEleMaps_; ++k) {
0279     iEvent.getByToken(eleMapTokens_[k], decisions[k]);
0280   }
0281 
0282   // Get MVA values
0283   edm::Handle<edm::ValueMap<float>> values[nValMaps_];
0284   for (size_t k = 0; k < nValMaps_; ++k) {
0285     iEvent.getByToken(valMapTokens_[k], values[k]);
0286   }
0287 
0288   // Get MVA categories
0289   edm::Handle<edm::ValueMap<int>> mvaCats[nCats_];
0290   for (size_t k = 0; k < nCats_; ++k) {
0291     iEvent.getByToken(mvaCatTokens_[k], mvaCats[k]);
0292   }
0293 
0294   std::vector<float> extraVariables = variableHelper_.getAuxVariables(iEvent);
0295 
0296   for (auto const& ele : src->ptrs()) {
0297     if (ele->pt() < ptThreshold_)
0298       continue;
0299 
0300     // Fill the energy matrix around the seed
0301     if (doEnergyMatrix_) {
0302       const auto& seed = *(ele->superCluster()->seed());
0303       energyMatrix_ = lazyTools->energyMatrix(seed, energyMatrixSize_);
0304     }
0305 
0306     // Fill various tree variable
0307     eleQ_ = ele->charge();
0308     ele3Q_ = ele->chargeInfo().isGsfCtfScPixConsistent;
0309 
0310     for (int iVar = 0; iVar < nVars_; ++iVar) {
0311       vars_[iVar] = mvaVarMngr_.getValue(iVar, *ele, extraVariables);
0312     }
0313 
0314     if (isMC_) {
0315       matchedToGenEle_ = matchToTruth(*ele, *genParticles);
0316     }
0317 
0318     // gap variables
0319     eleIsEB_ = ele->isEB();
0320     eleIsEE_ = ele->isEE();
0321     eleIsEBEEGap_ = ele->isEBEEGap();
0322     eleIsEBEtaGap_ = ele->isEBEtaGap();
0323     eleIsEBPhiGap_ = ele->isEBPhiGap();
0324     eleIsEEDeeGap_ = ele->isEEDeeGap();
0325     eleIsEERingGap_ = ele->isEERingGap();
0326 
0327     //
0328     // Look up and save the ID decisions
0329     //
0330     for (size_t k = 0; k < nEleMaps_; ++k)
0331       mvaPasses_[k] = static_cast<int>((*decisions[k])[ele]);
0332     for (size_t k = 0; k < nValMaps_; ++k)
0333       mvaValues_[k] = (*values[k])[ele];
0334     for (size_t k = 0; k < nCats_; ++k)
0335       mvaCats_[k] = (*mvaCats[k])[ele];
0336 
0337     tree_->Fill();
0338   }
0339 }
0340 
0341 int ElectronMVANtuplizer::matchToTruth(reco::GsfElectron const& electron,
0342                                        edm::View<reco::GenParticle> const& genParticles) const {
0343   //
0344   // Explicit loop and geometric matching method (advised by Josh Bendavid)
0345   //
0346 
0347   // Find the closest status 1 gen electron to the reco electron
0348   double dR = 999;
0349   reco::GenParticle const* closestElectron = nullptr;
0350   for (auto const& particle : genParticles) {
0351     // Drop everything that is not electron or not status 1
0352     if (std::abs(particle.pdgId()) != 11 || particle.status() != 1)
0353       continue;
0354     //
0355     double dRtmp = ROOT::Math::VectorUtil::DeltaR(electron.p4(), particle.p4());
0356     if (dRtmp < dR) {
0357       dR = dRtmp;
0358       closestElectron = &particle;
0359     }
0360   }
0361   // See if the closest electron is close enough. If not, no match found.
0362   if (closestElectron == nullptr || dR >= deltaR_)
0363     return UNMATCHED;
0364 
0365   if (closestElectron->fromHardProcessFinalState())
0366     return TRUE_PROMPT_ELECTRON;
0367 
0368   if (closestElectron->isDirectHardProcessTauDecayProductFinalState())
0369     return TRUE_ELECTRON_FROM_TAU;
0370 
0371   // What remains is true non-prompt electrons
0372   return TRUE_NON_PROMPT_ELECTRON;
0373 }
0374 
0375 // ------------ method fills 'descriptions' with the allowed parameters for the module  ------------
0376 void ElectronMVANtuplizer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0377   edm::ParameterSetDescription desc;
0378   desc.add<edm::InputTag>("src", edm::InputTag("slimmedElectrons"));
0379   desc.add<edm::InputTag>("vertices", edm::InputTag("offlineSlimmedPrimaryVertices"));
0380   desc.add<edm::InputTag>("pileup", edm::InputTag("slimmedAddPileupInfo"));
0381   desc.add<edm::InputTag>("genParticles", edm::InputTag("prunedGenParticles"));
0382   desc.add<edm::InputTag>("ebReducedRecHitCollection", edm::InputTag("reducedEgamma", "reducedEBRecHits"));
0383   desc.add<edm::InputTag>("eeReducedRecHitCollection", edm::InputTag("reducedEgamma", "reducedEERecHits"));
0384   desc.add<std::string>("variableDefinition");
0385   desc.add<bool>("doEnergyMatrix", false);
0386   desc.add<int>("energyMatrixSize", 2)->setComment("extension of crystals in each direction away from the seed");
0387   desc.add<bool>("isMC", true);
0388   desc.add<double>("deltaR", 0.1);
0389   desc.add<double>("ptThreshold", 5.0);
0390   desc.add<std::vector<std::string>>("eleMVAs", {});
0391   desc.add<std::vector<std::string>>("eleMVALabels", {});
0392   desc.add<std::vector<std::string>>("eleMVAValMaps", {});
0393   desc.add<std::vector<std::string>>("eleMVAValMapLabels", {});
0394   desc.add<std::vector<std::string>>("eleMVACats", {});
0395   desc.add<std::vector<std::string>>("eleMVACatLabels", {});
0396   descriptions.addDefault(desc);
0397 }
0398 
0399 //define this as a plug-in
0400 DEFINE_FWK_MODULE(ElectronMVANtuplizer);