EgammaTools/plugins/EGRegressionModifierV2.cc

0001 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0002 #include "RecoEcal/EgammaCoreTools/interface/EcalTools.h"
0003 #include "RecoEgamma/EgammaTools/plugins/EGRegressionModifierHelpers.h"
0004 #include "CommonTools/CandAlgos/interface/ModifyObjectValueBase.h"
0005 #include "DataFormats/Common/interface/ValueMap.h"
0006 #include "DataFormats/EgammaCandidates/interface/GsfElectron.h"
0007 #include "DataFormats/EgammaCandidates/interface/Photon.h"
0008 #include "FWCore/Utilities/interface/EDGetToken.h"
0009 #include "FWCore/Utilities/interface/InputTag.h"
0010 #include "RecoEgamma/EgammaTools/interface/EcalClusterLocal.h"
0011 #include "Geometry/Records/interface/CaloGeometryRecord.h"
0012 #include "Geometry/CaloGeometry/interface/CaloGeometry.h"
0013
0014 #include <vdt/vdtMath.h>
0015
0016 class EGRegressionModifierV2 : public ModifyObjectValueBase {
0017 public:
0018   EGRegressionModifierV2(const edm::ParameterSet& conf, edm::ConsumesCollector& cc);
0019
0020   void setEvent(const edm::Event&) final;
0021   void setEventContent(const edm::EventSetup&) final;
0022
0023   void modifyObject(reco::GsfElectron&) const final;
0024   void modifyObject(reco::Photon&) const final;
0025
0026   // just calls reco versions
0027   void modifyObject(pat::Electron& ele) const final { modifyObject(static_cast<reco::GsfElectron&>(ele)); }
0028   void modifyObject(pat::Photon& pho) const final { modifyObject(static_cast<reco::Photon&>(pho)); }
0029
0030 private:
0031   EGRegressionModifierCondTokens eleCondTokens_;
0032   EGRegressionModifierCondTokens phoCondTokens_;
0033
0034   float rhoValue_;
0035   const edm::EDGetTokenT<double> rhoToken_;
0036   const edm::ESGetToken<CaloGeometry, CaloGeometryRecord> caloGeometryToken_;
0037   CaloGeometry const* caloGeometry_ = nullptr;
0038
0039   std::vector<const GBRForestD*> phoForestsMean_;
0040   std::vector<const GBRForestD*> phoForestsSigma_;
0041   std::vector<const GBRForestD*> eleForestsMean_;
0042   std::vector<const GBRForestD*> eleForestsSigma_;
0043
0044   const double lowEnergyEcalOnlyThr_;    // 300
0045   const double lowEnergyEcalTrackThr_;   // 50
0046   const double highEnergyEcalTrackThr_;  // 200
0047   const double eOverPEcalTrkThr_;        // 0.025
0048   const double epDiffSigEcalTrackThr_;   // 15
0049   const double epSigEcalTrackThr_;       // 10
0050   const bool forceHighEnergyEcalTrainingIfSaturated_;
0051 };
0052
0053 DEFINE_EDM_PLUGIN(ModifyObjectValueFactory, EGRegressionModifierV2, "EGRegressionModifierV2");
0054
0055 EGRegressionModifierV2::EGRegressionModifierV2(const edm::ParameterSet& conf, edm::ConsumesCollector& cc)
0056     : ModifyObjectValueBase(conf),
0057       eleCondTokens_{conf.getParameterSet("electron_config"), "regressionKey", "uncertaintyKey", cc},
0058       phoCondTokens_{conf.getParameterSet("photon_config"), "regressionKey", "uncertaintyKey", cc},
0059
0060       rhoToken_(cc.consumes(conf.getParameter<edm::InputTag>("rhoCollection"))),
0061       caloGeometryToken_(cc.esConsumes()),
0062       lowEnergyEcalOnlyThr_(conf.getParameter<double>("lowEnergy_ECALonlyThr")),
0063       lowEnergyEcalTrackThr_(conf.getParameter<double>("lowEnergy_ECALTRKThr")),
0064       highEnergyEcalTrackThr_(conf.getParameter<double>("highEnergy_ECALTRKThr")),
0065       eOverPEcalTrkThr_(conf.getParameter<double>("eOverP_ECALTRKThr")),
0066       epDiffSigEcalTrackThr_(conf.getParameter<double>("epDiffSig_ECALTRKThr")),
0067       epSigEcalTrackThr_(conf.getParameter<double>("epSig_ECALTRKThr")),
0068       forceHighEnergyEcalTrainingIfSaturated_(conf.getParameter<bool>("forceHighEnergyEcalTrainingIfSaturated")) {
0069   unsigned int encor = eleCondTokens_.mean.size();
0070   eleForestsMean_.reserve(2 * encor);
0071   eleForestsSigma_.reserve(2 * encor);
0072
0073   unsigned int ncor = phoCondTokens_.mean.size();
0074   phoForestsMean_.reserve(ncor);
0075   phoForestsSigma_.reserve(ncor);
0076 }
0077
0078 void EGRegressionModifierV2::setEvent(const edm::Event& evt) { rhoValue_ = evt.get(rhoToken_); }
0079
0080 void EGRegressionModifierV2::setEventContent(const edm::EventSetup& evs) {
0081   phoForestsMean_ = retrieveGBRForests(evs, phoCondTokens_.mean);
0082   phoForestsSigma_ = retrieveGBRForests(evs, phoCondTokens_.sigma);
0083
0084   eleForestsMean_ = retrieveGBRForests(evs, eleCondTokens_.mean);
0085   eleForestsSigma_ = retrieveGBRForests(evs, eleCondTokens_.sigma);
0086
0087   caloGeometry_ = &evs.getData(caloGeometryToken_);
0088 }
0089
0090 void EGRegressionModifierV2::modifyObject(reco::GsfElectron& ele) const {
0091   // regression calculation needs no additional valuemaps
0092
0093   const reco::SuperClusterRef& superClus = ele.superCluster();
0094   const edm::Ptr<reco::CaloCluster>& seed = superClus->seed();
0095
0096   // skip HGCAL for now
0097   if (EcalTools::isHGCalDet(seed->seed().det()))
0098     return;
0099
0100   const int numberOfClusters = superClus->clusters().size();
0101   const bool missing_clusters = !superClus->clusters()[numberOfClusters - 1].isAvailable();
0102   if (missing_clusters)
0103     return;  // do not apply corrections in case of missing info (slimmed MiniAOD electrons)
0104
0105   //check if fbrem is filled as its needed for E/p combination so abort if its set to the default value
0106   //this will be the case for <5 (or current cuts) for miniAOD electrons
0107   if (ele.fbrem() == reco::GsfElectron::ClassificationVariables().trackFbrem)
0108     return;
0109
0110   const bool isEB = ele.isEB();
0111
0112   std::array<float, 32> eval;
0113   const double rawEnergy = superClus->rawEnergy();
0114   const double raw_es_energy = superClus->preshowerEnergy();
0115   const auto& full5x5_ess = ele.full5x5_showerShape();
0116
0117   float e5x5Inverse = full5x5_ess.e5x5 != 0. ? vdt::fast_inv(full5x5_ess.e5x5) : 0.;
0118
0119   eval[0] = rawEnergy;
0120   eval[1] = superClus->etaWidth();
0121   eval[2] = superClus->phiWidth();
0122   eval[3] = superClus->seed()->energy() / rawEnergy;
0123   eval[4] = full5x5_ess.e5x5 / rawEnergy;
0124   eval[5] = ele.hcalOverEcalBc();
0125   eval[6] = rhoValue_;
0126   eval[7] = seed->eta() - superClus->position().Eta();
0127   eval[8] = reco::deltaPhi(seed->phi(), superClus->position().Phi());
0128   eval[9] = full5x5_ess.r9;
0129   eval[10] = full5x5_ess.sigmaIetaIeta;
0130   eval[11] = full5x5_ess.sigmaIetaIphi;
0131   eval[12] = full5x5_ess.sigmaIphiIphi;
0132   eval[13] = full5x5_ess.eMax * e5x5Inverse;
0133   eval[14] = full5x5_ess.e2nd * e5x5Inverse;
0134   eval[15] = full5x5_ess.eTop * e5x5Inverse;
0135   eval[16] = full5x5_ess.eBottom * e5x5Inverse;
0136   eval[17] = full5x5_ess.eLeft * e5x5Inverse;
0137   eval[18] = full5x5_ess.eRight * e5x5Inverse;
0138   eval[19] = full5x5_ess.e2x5Max * e5x5Inverse;
0139   eval[20] = full5x5_ess.e2x5Left * e5x5Inverse;
0140   eval[21] = full5x5_ess.e2x5Right * e5x5Inverse;
0141   eval[22] = full5x5_ess.e2x5Top * e5x5Inverse;
0142   eval[23] = full5x5_ess.e2x5Bottom * e5x5Inverse;
0143   eval[24] = ele.nSaturatedXtals();
0144   eval[25] = std::max(0, numberOfClusters);
0145
0146   // calculate coordinate variables
0147   if (isEB) {
0148     float dummy;
0149     int ieta;
0150     int iphi;
0151     egammaTools::localEcalClusterCoordsEB(*seed, *caloGeometry_, dummy, dummy, ieta, iphi, dummy, dummy);
0152     eval[26] = ieta;
0153     eval[27] = iphi;
0154     int signieta = ieta > 0 ? +1 : -1;
0155     eval[28] = (ieta - signieta) % 5;
0156     eval[29] = (iphi - 1) % 2;
0157     eval[30] = (abs(ieta) <= 25) * ((ieta - signieta)) + (abs(ieta) > 25) * ((ieta - 26 * signieta) % 20);
0158     eval[31] = (iphi - 1) % 20;
0159
0160   } else {
0161     float dummy;
0162     int ix;
0163     int iy;
0164     egammaTools::localEcalClusterCoordsEE(*seed, *caloGeometry_, dummy, dummy, ix, iy, dummy, dummy);
0165     eval[26] = ix;
0166     eval[27] = iy;
0167     eval[28] = raw_es_energy / rawEnergy;
0168   }
0169
0170   //magic numbers for MINUIT-like transformation of BDT output onto limited range
0171   //(These should be stored inside the conditions object in the future as well)
0172   constexpr double meanlimlow = -1.0;
0173   constexpr double meanlimhigh = 3.0;
0174   constexpr double meanoffset = meanlimlow + 0.5 * (meanlimhigh - meanlimlow);
0175   constexpr double meanscale = 0.5 * (meanlimhigh - meanlimlow);
0176
0177   constexpr double sigmalimlow = 0.0002;
0178   constexpr double sigmalimhigh = 0.5;
0179   constexpr double sigmaoffset = sigmalimlow + 0.5 * (sigmalimhigh - sigmalimlow);
0180   constexpr double sigmascale = 0.5 * (sigmalimhigh - sigmalimlow);
0181
0182   size_t coridx = 0;
0183   float rawPt = rawEnergy * superClus->position().rho() / superClus->position().r();
0184   bool isSaturated = ele.nSaturatedXtals() != 0;
0185
0186   if (rawPt >= lowEnergyEcalOnlyThr_ || (isSaturated && forceHighEnergyEcalTrainingIfSaturated_)) {
0187     if (isEB)
0188       coridx = 1;
0189     else
0190       coridx = 3;
0191   } else {
0192     if (isEB)
0193       coridx = 0;
0194     else
0195       coridx = 2;
0196   }
0197
0198   //these are the actual BDT responses
0199   double rawmean = eleForestsMean_[coridx]->GetResponse(eval.data());
0200   double rawsigma = eleForestsSigma_[coridx]->GetResponse(eval.data());
0201
0202   //apply transformation to limited output range (matching the training)
0203   double mean = meanoffset + meanscale * vdt::fast_sin(rawmean);
0204   double sigma = sigmaoffset + sigmascale * vdt::fast_sin(rawsigma);
0205
0206   // Correct the energy. A negative energy means that the correction went
0207   // outside the boundaries of the training. In this case uses raw.
0208   // The resolution estimation, on the other hand should be ok.
0209   if (mean < 0.)
0210     mean = 1.0;
0211
0212   const double ecor = mean * (rawEnergy + raw_es_energy);
0213   const double sigmacor = sigma * ecor;
0214
0215   ele.setCorrectedEcalEnergy(ecor);
0216   ele.setCorrectedEcalEnergyError(sigmacor);
0217
0218   double combinedEnergy = ecor;
0219   double combinedEnergyError = sigmacor;
0220
0221   auto el_track = ele.gsfTrack();
0222   const float trkMomentum = el_track->pMode();
0223   const float trkEta = el_track->etaMode();
0224   const float trkPhi = el_track->phiMode();
0225   const float trkMomentumError = std::abs(el_track->qoverpModeError()) * trkMomentum * trkMomentum;
0226
0227   const float eOverP = (rawEnergy + raw_es_energy) * mean / trkMomentum;
0228   const float fbrem = ele.fbrem();
0229
0230   // E-p combination
0231   if (ecor < highEnergyEcalTrackThr_ && eOverP > eOverPEcalTrkThr_ &&
0232       std::abs(ecor - trkMomentum) <
0233           epDiffSigEcalTrackThr_ * std::sqrt(trkMomentumError * trkMomentumError + sigmacor * sigmacor) &&
0234       trkMomentumError < epSigEcalTrackThr_ * trkMomentum) {
0235     rawPt = ecor / cosh(trkEta);
0236     if (isEB && rawPt < lowEnergyEcalTrackThr_)
0237       coridx = 4;
0238     else if (isEB && rawPt >= lowEnergyEcalTrackThr_)
0239       coridx = 5;
0240     else if (!isEB && rawPt < lowEnergyEcalTrackThr_)
0241       coridx = 6;
0242     else if (!isEB && rawPt >= lowEnergyEcalTrackThr_)
0243       coridx = 7;
0244
0245     eval[0] = ecor;
0246     eval[1] = sigma / mean;
0247     eval[2] = trkMomentumError / trkMomentum;
0248     eval[3] = eOverP;
0249     eval[4] = ele.ecalDrivenSeed();
0250     eval[5] = full5x5_ess.r9;
0251     eval[6] = fbrem;
0252     eval[7] = trkEta;
0253     eval[8] = trkPhi;
0254
0255     float ecalEnergyVar = (rawEnergy + raw_es_energy) * sigma;
0256     float rawcombNormalization = (trkMomentumError * trkMomentumError + ecalEnergyVar * ecalEnergyVar);
0257     float rawcomb = (ecor * trkMomentumError * trkMomentumError + trkMomentum * ecalEnergyVar * ecalEnergyVar) /
0258                     rawcombNormalization;
0259
0260     //these are the actual BDT responses
0261     double rawmean_trk = eleForestsMean_[coridx]->GetResponse(eval.data());
0262     double rawsigma_trk = eleForestsSigma_[coridx]->GetResponse(eval.data());
0263
0264     //apply transformation to limited output range (matching the training)
0265     double mean_trk = meanoffset + meanscale * vdt::fast_sin(rawmean_trk);
0266     double sigma_trk = sigmaoffset + sigmascale * vdt::fast_sin(rawsigma_trk);
0267
0268     // Final correction
0269     // A negative energy means that the correction went
0270     // outside the boundaries of the training. In this case uses raw.
0271     // The resolution estimation, on the other hand should be ok.
0272     if (mean_trk < 0.)
0273       mean_trk = 1.0;
0274
0275     combinedEnergy = mean_trk * rawcomb;
0276     combinedEnergyError = sigma_trk * rawcomb;
0277   }
0278
0279   math::XYZTLorentzVector oldFourMomentum = ele.p4();
0280   math::XYZTLorentzVector newFourMomentum(oldFourMomentum.x() * combinedEnergy / oldFourMomentum.t(),
0281                                           oldFourMomentum.y() * combinedEnergy / oldFourMomentum.t(),
0282                                           oldFourMomentum.z() * combinedEnergy / oldFourMomentum.t(),
0283                                           combinedEnergy);
0284
0285   ele.correctMomentum(newFourMomentum, ele.trackMomentumError(), combinedEnergyError);
0286 }
0287
0288 void EGRegressionModifierV2::modifyObject(reco::Photon& pho) const {
0289   // regression calculation needs no additional valuemaps
0290
0291   const reco::SuperClusterRef& superClus = pho.superCluster();
0292   const edm::Ptr<reco::CaloCluster>& seed = superClus->seed();
0293
0294   // skip HGCAL for now
0295   if (EcalTools::isHGCalDet(seed->seed().det()))
0296     return;
0297
0298   const int numberOfClusters = superClus->clusters().size();
0299   const bool missing_clusters = !superClus->clusters()[numberOfClusters - 1].isAvailable();
0300   if (missing_clusters)
0301     return;  // do not apply corrections in case of missing info (slimmed MiniAOD electrons)
0302
0303   const bool isEB = pho.isEB();
0304
0305   std::array<float, 32> eval;
0306   const double rawEnergy = superClus->rawEnergy();
0307   const double raw_es_energy = superClus->preshowerEnergy();
0308   const auto& full5x5_pss = pho.full5x5_showerShapeVariables();
0309
0310   float e5x5Inverse = full5x5_pss.e5x5 != 0. ? vdt::fast_inv(full5x5_pss.e5x5) : 0.;
0311
0312   eval[0] = rawEnergy;
0313   eval[1] = superClus->etaWidth();
0314   eval[2] = superClus->phiWidth();
0315   eval[3] = superClus->seed()->energy() / rawEnergy;
0316   eval[4] = full5x5_pss.e5x5 / rawEnergy;
0317   eval[5] = pho.hadronicOverEm();
0318   eval[6] = rhoValue_;
0319   eval[7] = seed->eta() - superClus->position().Eta();
0320   eval[8] = reco::deltaPhi(seed->phi(), superClus->position().Phi());
0321   eval[9] = pho.full5x5_r9();
0322   eval[10] = full5x5_pss.sigmaIetaIeta;
0323   eval[11] = full5x5_pss.sigmaIetaIphi;
0324   eval[12] = full5x5_pss.sigmaIphiIphi;
0325   eval[13] = full5x5_pss.maxEnergyXtal * e5x5Inverse;
0326   eval[14] = full5x5_pss.e2nd * e5x5Inverse;
0327   eval[15] = full5x5_pss.eTop * e5x5Inverse;
0328   eval[16] = full5x5_pss.eBottom * e5x5Inverse;
0329   eval[17] = full5x5_pss.eLeft * e5x5Inverse;
0330   eval[18] = full5x5_pss.eRight * e5x5Inverse;
0331   eval[19] = full5x5_pss.e2x5Max * e5x5Inverse;
0332   eval[20] = full5x5_pss.e2x5Left * e5x5Inverse;
0333   eval[21] = full5x5_pss.e2x5Right * e5x5Inverse;
0334   eval[22] = full5x5_pss.e2x5Top * e5x5Inverse;
0335   eval[23] = full5x5_pss.e2x5Bottom * e5x5Inverse;
0336   eval[24] = pho.nSaturatedXtals();
0337   eval[25] = std::max(0, numberOfClusters);
0338
0339   // calculate coordinate variables
0340
0341   if (isEB) {
0342     float dummy;
0343     int ieta;
0344     int iphi;
0345     egammaTools::localEcalClusterCoordsEB(*seed, *caloGeometry_, dummy, dummy, ieta, iphi, dummy, dummy);
0346     eval[26] = ieta;
0347     eval[27] = iphi;
0348     int signieta = ieta > 0 ? +1 : -1;
0349     eval[28] = (ieta - signieta) % 5;
0350     eval[29] = (iphi - 1) % 2;
0351     eval[30] = (abs(ieta) <= 25) * ((ieta - signieta)) + (abs(ieta) > 25) * ((ieta - 26 * signieta) % 20);
0352     eval[31] = (iphi - 1) % 20;
0353
0354   } else {
0355     float dummy;
0356     int ix;
0357     int iy;
0358     egammaTools::localEcalClusterCoordsEE(*seed, *caloGeometry_, dummy, dummy, ix, iy, dummy, dummy);
0359     eval[26] = ix;
0360     eval[27] = iy;
0361     eval[28] = raw_es_energy / rawEnergy;
0362   }
0363
0364   //magic numbers for MINUIT-like transformation of BDT output onto limited range
0365   //(These should be stored inside the conditions object in the future as well)
0366   constexpr double meanlimlow = -1.0;
0367   constexpr double meanlimhigh = 3.0;
0368   constexpr double meanoffset = meanlimlow + 0.5 * (meanlimhigh - meanlimlow);
0369   constexpr double meanscale = 0.5 * (meanlimhigh - meanlimlow);
0370
0371   constexpr double sigmalimlow = 0.0002;
0372   constexpr double sigmalimhigh = 0.5;
0373   constexpr double sigmaoffset = sigmalimlow + 0.5 * (sigmalimhigh - sigmalimlow);
0374   constexpr double sigmascale = 0.5 * (sigmalimhigh - sigmalimlow);
0375
0376   size_t coridx = 0;
0377   float rawPt = rawEnergy * superClus->position().rho() / superClus->position().r();
0378   bool isSaturated = pho.nSaturatedXtals();
0379
0380   if (rawPt >= lowEnergyEcalOnlyThr_ || (isSaturated && forceHighEnergyEcalTrainingIfSaturated_)) {
0381     if (isEB)
0382       coridx = 1;
0383     else
0384       coridx = 3;
0385   } else {
0386     if (isEB)
0387       coridx = 0;
0388     else
0389       coridx = 2;
0390   }
0391
0392   //these are the actual BDT responses
0393   double rawmean = phoForestsMean_[coridx]->GetResponse(eval.data());
0394   double rawsigma = phoForestsSigma_[coridx]->GetResponse(eval.data());
0395
0396   //apply transformation to limited output range (matching the training)
0397   double mean = meanoffset + meanscale * vdt::fast_sin(rawmean);
0398   double sigma = sigmaoffset + sigmascale * vdt::fast_sin(rawsigma);
0399
0400   // Correct the energy. A negative energy means that the correction went
0401   // outside the boundaries of the training. In this case uses raw.
0402   // The resolution estimation, on the other hand should be ok.
0403   if (mean < 0.)
0404     mean = 1.0;
0405
0406   const double ecor = mean * (rawEnergy + raw_es_energy);
0407   const double sigmacor = sigma * ecor;
0408
0409   pho.setCorrectedEnergy(reco::Photon::P4type::regression2, ecor, sigmacor, true);
0410 }