EgammaTools/plugins/EGRegressionModifierV1.cc

0001 #include "CondFormats/DataRecord/interface/GBRWrapperRcd.h"
0002 #include "CondFormats/GBRForest/interface/GBRForest.h"
0003 #include "DataFormats/EcalDetId/interface/EBDetId.h"
0004 #include "DataFormats/EcalDetId/interface/EEDetId.h"
0005 #include "RecoEgamma/EgammaTools/plugins/EGRegressionModifierHelpers.h"
0006 #include "CommonTools/CandAlgos/interface/ModifyObjectValueBase.h"
0007 #include "DataFormats/Common/interface/ValueMap.h"
0008 #include "DataFormats/EgammaCandidates/interface/GsfElectron.h"
0009 #include "DataFormats/EgammaCandidates/interface/Photon.h"
0010 #include "FWCore/Utilities/interface/EDGetToken.h"
0011 #include "FWCore/Utilities/interface/InputTag.h"
0012 #include "RecoEgamma/EgammaTools/interface/EcalClusterLocal.h"
0013 #include "Geometry/Records/interface/CaloGeometryRecord.h"
0014 #include "Geometry/CaloGeometry/interface/CaloGeometry.h"
0015
0016 #include <vdt/vdtMath.h>
0017
0018 class EGRegressionModifierV1 : public ModifyObjectValueBase {
0019 public:
0020   EGRegressionModifierV1(const edm::ParameterSet& conf, edm::ConsumesCollector& cc);
0021
0022   void setEvent(const edm::Event&) final;
0023   void setEventContent(const edm::EventSetup&) final;
0024
0025   void modifyObject(reco::GsfElectron&) const final;
0026   void modifyObject(reco::Photon&) const final;
0027
0028   // just calls reco versions
0029   void modifyObject(pat::Electron& ele) const final { modifyObject(static_cast<reco::GsfElectron&>(ele)); }
0030   void modifyObject(pat::Photon& pho) const final { modifyObject(static_cast<reco::Photon&>(pho)); }
0031
0032 private:
0033   EGRegressionModifierCondTokens eleCond50nsTokens_;
0034   EGRegressionModifierCondTokens phoCond50nsTokens_;
0035   EGRegressionModifierCondTokens eleCond25nsTokens_;
0036   EGRegressionModifierCondTokens phoCond25nsTokens_;
0037
0038   edm::ESGetToken<GBRForest, GBRWrapperRcd> condNamesWeight50nsToken_;
0039   edm::ESGetToken<GBRForest, GBRWrapperRcd> condNamesWeight25nsToken_;
0040
0041   const bool autoDetectBunchSpacing_;
0042   int bunchspacing_;
0043   edm::EDGetTokenT<unsigned int> bunchSpacingToken_;
0044   float rhoValue_;
0045   edm::EDGetTokenT<double> rhoToken_;
0046   int nVtx_;
0047   edm::EDGetTokenT<reco::VertexCollection> vtxToken_;
0048   edm::ESGetToken<CaloGeometry, CaloGeometryRecord> caloGeomToken_;
0049   CaloGeometry const* caloGeom_;
0050   const bool applyExtraHighEnergyProtection_;
0051
0052   std::vector<const GBRForestD*> phoForestsMean_;
0053   std::vector<const GBRForestD*> phoForestsSigma_;
0054   std::vector<const GBRForestD*> eleForestsMean_;
0055   std::vector<const GBRForestD*> eleForestsSigma_;
0056   const GBRForest* epForest_;
0057 };
0058
0059 DEFINE_EDM_PLUGIN(ModifyObjectValueFactory, EGRegressionModifierV1, "EGRegressionModifierV1");
0060
0061 EGRegressionModifierV1::EGRegressionModifierV1(const edm::ParameterSet& conf, edm::ConsumesCollector& cc)
0062     : ModifyObjectValueBase(conf),
0063       eleCond50nsTokens_{conf.getParameterSet("electron_config"), "regressionKey_50ns", "uncertaintyKey_50ns", cc},
0064       phoCond50nsTokens_{conf.getParameterSet("photon_config"), "regressionKey_50ns", "uncertaintyKey_50ns", cc},
0065       eleCond25nsTokens_{conf.getParameterSet("electron_config"), "regressionKey_25ns", "uncertaintyKey_25ns", cc},
0066       phoCond25nsTokens_{conf.getParameterSet("photon_config"), "regressionKey_25ns", "uncertaintyKey_25ns", cc},
0067       autoDetectBunchSpacing_(conf.getParameter<bool>("autoDetectBunchSpacing")),
0068       bunchspacing_(autoDetectBunchSpacing_ ? 450 : conf.getParameter<int>("manualBunchSpacing")),
0069       rhoToken_(cc.consumes(conf.getParameter<edm::InputTag>("rhoCollection"))),
0070       vtxToken_(cc.consumes(conf.getParameter<edm::InputTag>("vertexCollection"))),
0071       caloGeomToken_{cc.esConsumes()},
0072       applyExtraHighEnergyProtection_(conf.getParameter<bool>("applyExtraHighEnergyProtection")) {
0073   if (autoDetectBunchSpacing_)
0074     bunchSpacingToken_ = cc.consumes(conf.getParameter<edm::InputTag>("bunchSpacingTag"));
0075
0076   auto const& electrons = conf.getParameterSet("electron_config");
0077   condNamesWeight50nsToken_ = cc.esConsumes(electrons.getParameter<edm::ESInputTag>("combinationKey_50ns"));
0078   condNamesWeight25nsToken_ = cc.esConsumes(electrons.getParameter<edm::ESInputTag>("combinationKey_25ns"));
0079 }
0080
0081 void EGRegressionModifierV1::setEvent(const edm::Event& evt) {
0082   if (autoDetectBunchSpacing_) {
0083     bunchspacing_ = evt.get(bunchSpacingToken_);
0084   }
0085   rhoValue_ = evt.get(rhoToken_);
0086   nVtx_ = evt.get(vtxToken_).size();
0087 }
0088
0089 void EGRegressionModifierV1::setEventContent(const edm::EventSetup& evs) {
0090   caloGeom_ = &evs.getData(caloGeomToken_);
0091
0092   phoForestsMean_ = retrieveGBRForests(evs, (bunchspacing_ == 25) ? phoCond25nsTokens_.mean : phoCond50nsTokens_.mean);
0093   phoForestsSigma_ =
0094       retrieveGBRForests(evs, (bunchspacing_ == 25) ? phoCond25nsTokens_.sigma : phoCond50nsTokens_.sigma);
0095
0096   eleForestsMean_ = retrieveGBRForests(evs, (bunchspacing_ == 25) ? eleCond25nsTokens_.mean : eleCond50nsTokens_.mean);
0097   eleForestsSigma_ =
0098       retrieveGBRForests(evs, (bunchspacing_ == 25) ? eleCond25nsTokens_.sigma : eleCond50nsTokens_.sigma);
0099
0100   epForest_ = &evs.getData((bunchspacing_ == 25) ? condNamesWeight25nsToken_ : condNamesWeight50nsToken_);
0101 }
0102
0103 void EGRegressionModifierV1::modifyObject(reco::GsfElectron& ele) const {
0104   // regression calculation needs no additional valuemaps
0105
0106   const reco::SuperClusterRef& superClus = ele.superCluster();
0107   const edm::Ptr<reco::CaloCluster>& theseed = superClus->seed();
0108   const int numberOfClusters = superClus->clusters().size();
0109   const bool missing_clusters = !superClus->clusters()[numberOfClusters - 1].isAvailable();
0110
0111   if (missing_clusters)
0112     return;  // do not apply corrections in case of missing info (slimmed MiniAOD electrons)
0113
0114   std::array<float, 33> eval;
0115   const double rawEnergy = superClus->rawEnergy();
0116   const auto& ess = ele.showerShape();
0117
0118   // SET INPUTS
0119   eval[0] = nVtx_;
0120   eval[1] = rawEnergy;
0121   eval[2] = superClus->eta();
0122   eval[3] = superClus->phi();
0123   eval[4] = superClus->etaWidth();
0124   eval[5] = superClus->phiWidth();
0125   eval[6] = ess.r9;
0126   eval[7] = theseed->energy() / rawEnergy;
0127   eval[8] = ess.eMax / rawEnergy;
0128   eval[9] = ess.e2nd / rawEnergy;
0129   eval[10] = (ess.eLeft + ess.eRight != 0.f ? (ess.eLeft - ess.eRight) / (ess.eLeft + ess.eRight) : 0.f);
0130   eval[11] = (ess.eTop + ess.eBottom != 0.f ? (ess.eTop - ess.eBottom) / (ess.eTop + ess.eBottom) : 0.f);
0131   eval[12] = ess.sigmaIetaIeta;
0132   eval[13] = ess.sigmaIetaIphi;
0133   eval[14] = ess.sigmaIphiIphi;
0134   eval[15] = std::max(0, numberOfClusters - 1);
0135
0136   // calculate sub-cluster variables
0137   std::vector<float> clusterRawEnergy;
0138   clusterRawEnergy.resize(std::max(3, numberOfClusters), 0);
0139   std::vector<float> clusterDEtaToSeed;
0140   clusterDEtaToSeed.resize(std::max(3, numberOfClusters), 0);
0141   std::vector<float> clusterDPhiToSeed;
0142   clusterDPhiToSeed.resize(std::max(3, numberOfClusters), 0);
0143   float clusterMaxDR = 999.;
0144   float clusterMaxDRDPhi = 999.;
0145   float clusterMaxDRDEta = 999.;
0146   float clusterMaxDRRawEnergy = 0.;
0147
0148   size_t iclus = 0;
0149   float maxDR = 0;
0150   // loop over all clusters that aren't the seed
0151   for (auto const& pclus : superClus->clusters()) {
0152     if (theseed == pclus)
0153       continue;
0154     clusterRawEnergy[iclus] = pclus->energy();
0155     clusterDPhiToSeed[iclus] = reco::deltaPhi(pclus->phi(), theseed->phi());
0156     clusterDEtaToSeed[iclus] = pclus->eta() - theseed->eta();
0157
0158     // find cluster with max dR
0159     const auto the_dr = reco::deltaR(*pclus, *theseed);
0160     if (the_dr > maxDR) {
0161       maxDR = the_dr;
0162       clusterMaxDR = maxDR;
0163       clusterMaxDRDPhi = clusterDPhiToSeed[iclus];
0164       clusterMaxDRDEta = clusterDEtaToSeed[iclus];
0165       clusterMaxDRRawEnergy = clusterRawEnergy[iclus];
0166     }
0167     ++iclus;
0168   }
0169
0170   eval[16] = clusterMaxDR;
0171   eval[17] = clusterMaxDRDPhi;
0172   eval[18] = clusterMaxDRDEta;
0173   eval[19] = clusterMaxDRRawEnergy / rawEnergy;
0174   eval[20] = clusterRawEnergy[0] / rawEnergy;
0175   eval[21] = clusterRawEnergy[1] / rawEnergy;
0176   eval[22] = clusterRawEnergy[2] / rawEnergy;
0177   eval[23] = clusterDPhiToSeed[0];
0178   eval[24] = clusterDPhiToSeed[1];
0179   eval[25] = clusterDPhiToSeed[2];
0180   eval[26] = clusterDEtaToSeed[0];
0181   eval[27] = clusterDEtaToSeed[1];
0182   eval[28] = clusterDEtaToSeed[2];
0183
0184   // calculate coordinate variables
0185   const bool isEB = ele.isEB();
0186   float dummy;
0187   int iPhi;
0188   int iEta;
0189   float cryPhi;
0190   float cryEta;
0191   if (ele.isEB())
0192     egammaTools::localEcalClusterCoordsEB(*theseed, *caloGeom_, cryEta, cryPhi, iEta, iPhi, dummy, dummy);
0193   else
0194     egammaTools::localEcalClusterCoordsEE(*theseed, *caloGeom_, cryEta, cryPhi, iEta, iPhi, dummy, dummy);
0195
0196   if (isEB) {
0197     eval[29] = cryEta;
0198     eval[30] = cryPhi;
0199     eval[31] = iEta;
0200     eval[32] = iPhi;
0201   } else {
0202     eval[29] = superClus->preshowerEnergy() / superClus->rawEnergy();
0203   }
0204
0205   //magic numbers for MINUIT-like transformation of BDT output onto limited range
0206   //(These should be stored inside the conditions object in the future as well)
0207   constexpr double meanlimlow = 0.2;
0208   constexpr double meanlimhigh = 2.0;
0209   constexpr double meanoffset = meanlimlow + 0.5 * (meanlimhigh - meanlimlow);
0210   constexpr double meanscale = 0.5 * (meanlimhigh - meanlimlow);
0211
0212   constexpr double sigmalimlow = 0.0002;
0213   constexpr double sigmalimhigh = 0.5;
0214   constexpr double sigmaoffset = sigmalimlow + 0.5 * (sigmalimhigh - sigmalimlow);
0215   constexpr double sigmascale = 0.5 * (sigmalimhigh - sigmalimlow);
0216
0217   const int coridx = isEB ? 0 : 1;
0218
0219   //these are the actual BDT responses
0220   double rawmean = eleForestsMean_[coridx]->GetResponse(eval.data());
0221   double rawsigma = eleForestsSigma_[coridx]->GetResponse(eval.data());
0222
0223   //apply transformation to limited output range (matching the training)
0224   double mean = meanoffset + meanscale * vdt::fast_sin(rawmean);
0225   double sigma = sigmaoffset + sigmascale * vdt::fast_sin(rawsigma);
0226
0227   //regression target is ln(Etrue/Eraw)
0228   //so corrected energy is ecor=exp(mean)*e, uncertainty is exp(mean)*eraw*sigma=ecor*sigma
0229   double ecor = mean * (eval[1]);
0230   if (!isEB)
0231     ecor = mean * (eval[1] + superClus->preshowerEnergy());
0232   const double sigmacor = sigma * ecor;
0233
0234   ele.setCorrectedEcalEnergy(ecor);
0235   ele.setCorrectedEcalEnergyError(sigmacor);
0236
0237   // E-p combination
0238   std::array<float, 11> eval_ep;
0239
0240   const float ep = ele.trackMomentumAtVtx().R();
0241   const float tot_energy = superClus->rawEnergy() + superClus->preshowerEnergy();
0242   const float momentumError = ele.trackMomentumError();
0243   const float trkMomentumRelError = ele.trackMomentumError() / ep;
0244   const float eOverP = tot_energy * mean / ep;
0245   eval_ep[0] = tot_energy * mean;
0246   eval_ep[1] = sigma / mean;
0247   eval_ep[2] = ep;
0248   eval_ep[3] = trkMomentumRelError;
0249   eval_ep[4] = sigma / mean / trkMomentumRelError;
0250   eval_ep[5] = tot_energy * mean / ep;
0251   eval_ep[6] = tot_energy * mean / ep * sqrt(sigma / mean * sigma / mean + trkMomentumRelError * trkMomentumRelError);
0252   eval_ep[7] = ele.ecalDriven();
0253   eval_ep[8] = ele.trackerDrivenSeed();
0254   eval_ep[9] = int(ele.classification());  //eleClass;
0255   eval_ep[10] = isEB;
0256
0257   // CODE FOR FUTURE SEMI_PARAMETRIC
0258   //double rawweight = ep_forestsMean_[coridx]->GetResponse(eval_ep.data());
0259   ////rawsigma = ep_forestsSigma_[coridx]->GetResponse(eval.data());
0260   //double weight = meanoffset + meanscale*vdt::fast_sin(rawweight);
0261   ////sigma = sigmaoffset + sigmascale*vdt::fast_sin(rawsigma);
0262
0263   // CODE FOR STANDARD BDT
0264   double weight = 0.;
0265   if (eOverP > 0.025 && std::abs(ep - ecor) < 15. * std::sqrt(momentumError * momentumError + sigmacor * sigmacor) &&
0266       (!applyExtraHighEnergyProtection_ || ((momentumError < 10. * ep) || (ecor < 200.)))) {
0267     // protection against crazy track measurement
0268     weight = std::clamp(epForest_->GetResponse(eval_ep.data()), 0., 1.);
0269   }
0270
0271   double combinedMomentum = weight * ele.trackMomentumAtVtx().R() + (1. - weight) * ecor;
0272   double combinedMomentumError = sqrt(weight * weight * ele.trackMomentumError() * ele.trackMomentumError() +
0273                                       (1. - weight) * (1. - weight) * sigmacor * sigmacor);
0274
0275   math::XYZTLorentzVector oldMomentum = ele.p4();
0276   math::XYZTLorentzVector newMomentum(oldMomentum.x() * combinedMomentum / oldMomentum.t(),
0277                                       oldMomentum.y() * combinedMomentum / oldMomentum.t(),
0278                                       oldMomentum.z() * combinedMomentum / oldMomentum.t(),
0279                                       combinedMomentum);
0280
0281   ele.correctMomentum(newMomentum, ele.trackMomentumError(), combinedMomentumError);
0282 }
0283
0284 void EGRegressionModifierV1::modifyObject(reco::Photon& pho) const {
0285   // regression calculation needs no additional valuemaps
0286
0287   std::array<float, 35> eval;
0288   const reco::SuperClusterRef& superClus = pho.superCluster();
0289   const edm::Ptr<reco::CaloCluster>& theseed = superClus->seed();
0290
0291   const int numberOfClusters = superClus->clusters().size();
0292   const bool missing_clusters = !superClus->clusters()[numberOfClusters - 1].isAvailable();
0293
0294   if (missing_clusters)
0295     return;  // do not apply corrections in case of missing info (slimmed MiniAOD electrons)
0296
0297   const double rawEnergy = superClus->rawEnergy();
0298   const auto& ess = pho.showerShapeVariables();
0299
0300   // SET INPUTS
0301   eval[0] = rawEnergy;
0302   eval[1] = pho.r9();
0303   eval[2] = superClus->etaWidth();
0304   eval[3] = superClus->phiWidth();
0305   eval[4] = std::max(0, numberOfClusters - 1);
0306   eval[5] = pho.hadronicOverEm();
0307   eval[6] = rhoValue_;
0308   eval[7] = nVtx_;
0309   eval[8] = theseed->eta() - superClus->position().Eta();
0310   eval[9] = reco::deltaPhi(theseed->phi(), superClus->position().Phi());
0311   eval[10] = theseed->energy() / rawEnergy;
0312   eval[11] = ess.e3x3 / ess.e5x5;
0313   eval[12] = ess.sigmaIetaIeta;
0314   eval[13] = ess.sigmaIphiIphi;
0315   eval[14] = ess.sigmaIetaIphi / (ess.sigmaIphiIphi * ess.sigmaIetaIeta);
0316   eval[15] = ess.maxEnergyXtal / ess.e5x5;
0317   eval[16] = ess.e2nd / ess.e5x5;
0318   eval[17] = ess.eTop / ess.e5x5;
0319   eval[18] = ess.eBottom / ess.e5x5;
0320   eval[19] = ess.eLeft / ess.e5x5;
0321   eval[20] = ess.eRight / ess.e5x5;
0322   eval[21] = ess.e2x5Max / ess.e5x5;
0323   eval[22] = ess.e2x5Left / ess.e5x5;
0324   eval[23] = ess.e2x5Right / ess.e5x5;
0325   eval[24] = ess.e2x5Top / ess.e5x5;
0326   eval[25] = ess.e2x5Bottom / ess.e5x5;
0327
0328   const bool isEB = pho.isEB();
0329   if (isEB) {
0330     EBDetId ebseedid(theseed->seed());
0331     eval[26] = pho.e5x5() / theseed->energy();
0332     int ieta = ebseedid.ieta();
0333     int iphi = ebseedid.iphi();
0334     eval[27] = ieta;
0335     eval[28] = iphi;
0336     int signieta = ieta > 0 ? +1 : -1;  /// this is 1*abs(ieta)/ieta in original training
0337     eval[29] = (ieta - signieta) % 5;
0338     eval[30] = (iphi - 1) % 2;
0339     eval[31] = (abs(ieta) <= 25) * ((ieta - signieta)) + (abs(ieta) > 25) * ((ieta - 26 * signieta) % 20);
0340     eval[32] = (iphi - 1) % 20;
0341     eval[33] = ieta;  /// duplicated variables but this was trained like that
0342     eval[34] = iphi;  /// duplicated variables but this was trained like that
0343   } else {
0344     EEDetId eeseedid(theseed->seed());
0345     eval[26] = superClus->preshowerEnergy() / rawEnergy;
0346     eval[27] = superClus->preshowerEnergyPlane1() / rawEnergy;
0347     eval[28] = superClus->preshowerEnergyPlane2() / rawEnergy;
0348     eval[29] = eeseedid.ix();
0349     eval[30] = eeseedid.iy();
0350   }
0351
0352   //magic numbers for MINUIT-like transformation of BDT output onto limited range
0353   //(These should be stored inside the conditions object in the future as well)
0354   const double meanlimlow = 0.2;
0355   const double meanlimhigh = 2.0;
0356   const double meanoffset = meanlimlow + 0.5 * (meanlimhigh - meanlimlow);
0357   const double meanscale = 0.5 * (meanlimhigh - meanlimlow);
0358
0359   const double sigmalimlow = 0.0002;
0360   const double sigmalimhigh = 0.5;
0361   const double sigmaoffset = sigmalimlow + 0.5 * (sigmalimhigh - sigmalimlow);
0362   const double sigmascale = 0.5 * (sigmalimhigh - sigmalimlow);
0363
0364   const int coridx = isEB ? 0 : 1;
0365
0366   //these are the actual BDT responses
0367   const double rawmean = phoForestsMean_[coridx]->GetResponse(eval.data());
0368   const double rawsigma = phoForestsSigma_[coridx]->GetResponse(eval.data());
0369   //apply transformation to limited output range (matching the training)
0370   const double mean = meanoffset + meanscale * vdt::fast_sin(rawmean);
0371   const double sigma = sigmaoffset + sigmascale * vdt::fast_sin(rawsigma);
0372
0373   //regression target is ln(Etrue/Eraw)
0374   //so corrected energy is ecor=exp(mean)*e, uncertainty is exp(mean)*eraw*sigma=ecor*sigma
0375   const double ecor = isEB ? mean * eval[0] : mean * (eval[0] + superClus->preshowerEnergy());
0376
0377   const double sigmacor = sigma * ecor;
0378   pho.setCorrectedEnergy(reco::Photon::P4type::regression2, ecor, sigmacor, true);
0379 }