Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:22

0001 #ifndef PhysicsTools_Utilities_interface_Lumi3DReWeighting_cc
0002 #define PhysicsTools_Utilities_interface_Lumi3DReWeighting_cc
0003 
0004 /**
0005   \class    Lumi3DReWeighting Lumi3DReWeighting.h "PhysicsTools/Utilities/interface/Lumi3DReWeighting.h"
0006   \brief    Class to provide lumi weighting for analyzers to weight "flat-to-N" MC samples to data
0007 
0008   This class will trivially take two histograms:
0009   1. The generated "flat-to-N" distributions from a given processing (or any other generated input)
0010   2. A histogram generated from the "estimatePileup" macro here:
0011 
0012   https://twiki.cern.ch/twiki/bin/view/CMS/LumiCalc#How_to_use_script_estimatePileup
0013 
0014   and produce weights to convert the input distribution (1) to the latter (2).
0015 
0016   \author Salvatore Rappoccio, modified by Mike Hildreth
0017   
0018 */
0019 #include "TFile.h"
0020 #include "TH1.h"
0021 #include "TH3.h"
0022 #include "TRandom1.h"
0023 #include "TRandom2.h"
0024 #include "TRandom3.h"
0025 #include "TStopwatch.h"
0026 #include <algorithm>
0027 #include <iostream>
0028 #include <memory>
0029 #include <string>
0030 
0031 #include "SimDataFormats/PileupSummaryInfo/interface/PileupSummaryInfo.h"
0032 #include "PhysicsTools/Utilities/interface/Lumi3DReWeighting.h"
0033 #include "DataFormats/Common/interface/Handle.h"
0034 #include "DataFormats/Provenance/interface/ProcessHistory.h"
0035 #include "DataFormats/Provenance/interface/Provenance.h"
0036 #include "FWCore/Common/interface/EventBase.h"
0037 
0038 using namespace edm;
0039 
0040 Lumi3DReWeighting::Lumi3DReWeighting(std::string generatedFile,
0041                                      std::string dataFile,
0042                                      std::string GenHistName = "pileup",
0043                                      std::string DataHistName = "pileup",
0044                                      std::string WeightOutputFile = "")
0045     : generatedFileName_(generatedFile),
0046       dataFileName_(dataFile),
0047       GenHistName_(GenHistName),
0048       DataHistName_(DataHistName),
0049       weightFileName_(WeightOutputFile) {
0050   generatedFile_ = std::make_shared<TFile>(generatedFileName_.c_str());  //MC distribution
0051   dataFile_ = std::make_shared<TFile>(dataFileName_.c_str());            //Data distribution
0052 
0053   std::shared_ptr<TH1> Data_temp =
0054       std::shared_ptr<TH1>((static_cast<TH1 *>(dataFile_->Get(DataHistName_.c_str())->Clone())));
0055 
0056   std::shared_ptr<TH1> MC_temp =
0057       std::shared_ptr<TH1>((static_cast<TH1 *>(generatedFile_->Get(GenHistName_.c_str())->Clone())));
0058 
0059   MC_distr_ = std::shared_ptr<TH1>((static_cast<TH1 *>(generatedFile_->Get(GenHistName_.c_str())->Clone())));
0060   Data_distr_ = std::shared_ptr<TH1>((static_cast<TH1 *>(dataFile_->Get(DataHistName_.c_str())->Clone())));
0061 
0062   // MC * data/MC = data, so the weights are data/MC:
0063 
0064   // normalize both histograms first
0065 
0066   Data_distr_->Scale(1.0 / Data_distr_->Integral());
0067   MC_distr_->Scale(1.0 / MC_distr_->Integral());
0068 }
0069 
0070 Lumi3DReWeighting::Lumi3DReWeighting(const std::vector<float> &MC_distr,
0071                                      const std::vector<float> &Lumi_distr,
0072                                      std::string WeightOutputFile = "") {
0073   weightFileName_ = WeightOutputFile;
0074 
0075   // no histograms for input: use vectors
0076 
0077   // now, make histograms out of them:
0078 
0079   Int_t NMCBins = MC_distr.size();
0080 
0081   MC_distr_ = std::shared_ptr<TH1>(new TH1F("MC_distr", "MC dist", NMCBins, 0., float(NMCBins)));
0082 
0083   Int_t NDBins = Lumi_distr.size();
0084 
0085   Data_distr_ = std::shared_ptr<TH1>(new TH1F("Data_distr", "Data dist", NDBins, 0., float(NDBins)));
0086 
0087   for (int ibin = 1; ibin < NMCBins + 1; ++ibin) {
0088     MC_distr_->SetBinContent(ibin, MC_distr[ibin - 1]);
0089   }
0090 
0091   for (int ibin = 1; ibin < NDBins + 1; ++ibin) {
0092     Data_distr_->SetBinContent(ibin, Lumi_distr[ibin - 1]);
0093   }
0094 
0095   // check integrals, make sure things are normalized
0096 
0097   float deltaH = Data_distr_->Integral();
0098   if (fabs(1.0 - deltaH) > 0.001) {  //*OOPS*...
0099     Data_distr_->Scale(1.0 / Data_distr_->Integral());
0100   }
0101   float deltaMC = MC_distr_->Integral();
0102   if (fabs(1.0 - deltaMC) > 0.001) {
0103     MC_distr_->Scale(1.0 / MC_distr_->Integral());
0104   }
0105 }
0106 
0107 double Lumi3DReWeighting::weight3D(int pv1, int pv2, int pv3) {
0108   using std::min;
0109 
0110   int npm1 = min(pv1, 49);
0111   int np0 = min(pv2, 49);
0112   int npp1 = min(pv3, 49);
0113 
0114   return Weight3D_[npm1][np0][npp1];
0115 }
0116 
0117 // This version does all of the work for you, just taking an event pointer as input
0118 
0119 double Lumi3DReWeighting::weight3D(const edm::EventBase &e) {
0120   using std::min;
0121 
0122   // get pileup summary information
0123 
0124   Handle<std::vector<PileupSummaryInfo> > PupInfo;
0125   e.getByLabel(edm::InputTag("addPileupInfo"), PupInfo);
0126 
0127   std::vector<PileupSummaryInfo>::const_iterator PVI;
0128 
0129   int npm1 = -1;
0130   int np0 = -1;
0131   int npp1 = -1;
0132 
0133   for (PVI = PupInfo->begin(); PVI != PupInfo->end(); ++PVI) {
0134     int BX = PVI->getBunchCrossing();
0135 
0136     if (BX == -1) {
0137       npm1 = PVI->getPU_NumInteractions();
0138     }
0139     if (BX == 0) {
0140       np0 = PVI->getPU_NumInteractions();
0141     }
0142     if (BX == 1) {
0143       npp1 = PVI->getPU_NumInteractions();
0144     }
0145   }
0146 
0147   npm1 = min(npm1, 49);
0148   np0 = min(np0, 49);
0149   npp1 = min(npp1, 49);
0150 
0151   //  std::cout << " vertices " << npm1 << " " << np0 << " " << npp1 << std::endl;
0152 
0153   return Weight3D_[npm1][np0][npp1];
0154 }
0155 
0156 void Lumi3DReWeighting::weight3D_set(std::string generatedFile,
0157                                      std::string dataFile,
0158                                      std::string GenHistName,
0159                                      std::string DataHistName,
0160                                      std::string WeightOutputFile) {
0161   generatedFileName_ = generatedFile;
0162   dataFileName_ = dataFile;
0163   GenHistName_ = GenHistName;
0164   DataHistName_ = DataHistName;
0165   weightFileName_ = WeightOutputFile;
0166 
0167   std::cout << " seting values: " << generatedFileName_ << " " << dataFileName_ << " " << GenHistName_ << " "
0168             << DataHistName_ << std::endl;
0169 
0170   generatedFile_ = std::make_shared<TFile>(generatedFileName_.c_str());  //MC distribution
0171   dataFile_ = std::make_shared<TFile>(dataFileName_.c_str());            //Data distribution
0172 
0173   std::shared_ptr<TH1> Data_temp =
0174       std::shared_ptr<TH1>((static_cast<TH1 *>(dataFile_->Get(DataHistName_.c_str())->Clone())));
0175 
0176   std::shared_ptr<TH1> MC_temp =
0177       std::shared_ptr<TH1>((static_cast<TH1 *>(generatedFile_->Get(GenHistName_.c_str())->Clone())));
0178 
0179   MC_distr_ = std::shared_ptr<TH1>((static_cast<TH1 *>(generatedFile_->Get(GenHistName_.c_str())->Clone())));
0180   Data_distr_ = std::shared_ptr<TH1>((static_cast<TH1 *>(dataFile_->Get(DataHistName_.c_str())->Clone())));
0181 
0182   // MC * data/MC = data, so the weights are data/MC:
0183 
0184   // normalize both histograms first
0185 
0186   Data_distr_->Scale(1.0 / Data_distr_->Integral());
0187   MC_distr_->Scale(1.0 / MC_distr_->Integral());
0188 }
0189 
0190 void Lumi3DReWeighting::weight3D_init(float ScaleFactor) {
0191   // Scale factor is used to shift target distribution (i.e. luminosity scale)  1. = no shift
0192 
0193   //create histogram to write output weights, save pain of generating them again...
0194 
0195   TH3D *WHist = new TH3D("WHist", "3D weights", 50, 0., 50., 50, 0., 50., 50, 0., 50.);
0196   TH3D *DHist = new TH3D("DHist", "3D weights", 50, 0., 50., 50, 0., 50., 50, 0., 50.);
0197   TH3D *MHist = new TH3D("MHist", "3D weights", 50, 0., 50., 50, 0., 50., 50, 0., 50.);
0198 
0199   using std::min;
0200 
0201   if (MC_distr_->GetEntries() == 0) {
0202     std::cout << " MC and Data distributions are not initialized! You must call the Lumi3DReWeighting constructor. "
0203               << std::endl;
0204   }
0205 
0206   // arrays for storing number of interactions
0207 
0208   double MC_ints[50][50][50];
0209   double Data_ints[50][50][50];
0210 
0211   for (int i = 0; i < 50; i++) {
0212     for (int j = 0; j < 50; j++) {
0213       for (int k = 0; k < 50; k++) {
0214         MC_ints[i][j][k] = 0.;
0215         Data_ints[i][j][k] = 0.;
0216       }
0217     }
0218   }
0219 
0220   double factorial[50];
0221   double PowerSer[50];
0222   double base = 1.;
0223 
0224   factorial[0] = 1.;
0225   PowerSer[0] = 1.;
0226 
0227   for (int i = 1; i < 50; ++i) {
0228     base = base * float(i);
0229     factorial[i] = base;
0230   }
0231 
0232   double x;
0233   double xweight;
0234   double probi, probj, probk;
0235   double Expval, mean;
0236   int xi;
0237 
0238   // Get entries for Data, MC, fill arrays:
0239 
0240   int NMCbin = MC_distr_->GetNbinsX();
0241 
0242   for (int jbin = 1; jbin < NMCbin + 1; jbin++) {
0243     x = MC_distr_->GetBinCenter(jbin);
0244     xweight = MC_distr_->GetBinContent(jbin);  //use as weight for matrix
0245 
0246     //for Summer 11, we have this int feature:
0247     xi = int(x);
0248 
0249     // Generate Poisson distribution for each value of the mean
0250 
0251     mean = double(xi);
0252 
0253     if (mean < 0.) {
0254       throw cms::Exception("BadInputValue") << " Your histogram generates MC luminosity values less than zero!"
0255                                             << " Please Check.  Terminating." << std::endl;
0256     }
0257 
0258     if (mean == 0.) {
0259       Expval = 1.;
0260     } else {
0261       Expval = exp(-1. * mean);
0262     }
0263 
0264     base = 1.;
0265 
0266     for (int i = 1; i < 50; ++i) {
0267       base = base * mean;
0268       PowerSer[i] = base;  // PowerSer is mean^i
0269     }
0270 
0271     // compute poisson probability for each Nvtx in weight matrix
0272 
0273     for (int i = 0; i < 50; i++) {
0274       probi = PowerSer[i] / factorial[i] * Expval;
0275       for (int j = 0; j < 50; j++) {
0276         probj = PowerSer[j] / factorial[j] * Expval;
0277         for (int k = 0; k < 50; k++) {
0278           probk = PowerSer[k] / factorial[k] * Expval;
0279           // joint probability is product of event weights multiplied by weight of input distribution bin
0280           MC_ints[i][j][k] = MC_ints[i][j][k] + probi * probj * probk * xweight;
0281         }
0282       }
0283     }
0284   }
0285 
0286   int NDatabin = Data_distr_->GetNbinsX();
0287 
0288   for (int jbin = 1; jbin < NDatabin + 1; jbin++) {
0289     mean = (Data_distr_->GetBinCenter(jbin)) * ScaleFactor;
0290     xweight = Data_distr_->GetBinContent(jbin);
0291 
0292     // Generate poisson distribution for each value of the mean
0293 
0294     if (mean < 0.) {
0295       throw cms::Exception("BadInputValue") << " Your histogram generates Data luminosity values less than zero!"
0296                                             << " Please Check.  Terminating." << std::endl;
0297     }
0298 
0299     if (mean == 0.) {
0300       Expval = 1.;
0301     } else {
0302       Expval = exp(-1. * mean);
0303     }
0304 
0305     base = 1.;
0306 
0307     for (int i = 1; i < 50; ++i) {
0308       base = base * mean;
0309       PowerSer[i] = base;
0310     }
0311 
0312     // compute poisson probability for each Nvtx in weight matrix
0313 
0314     for (int i = 0; i < 50; i++) {
0315       probi = PowerSer[i] / factorial[i] * Expval;
0316       for (int j = 0; j < 50; j++) {
0317         probj = PowerSer[j] / factorial[j] * Expval;
0318         for (int k = 0; k < 50; k++) {
0319           probk = PowerSer[k] / factorial[k] * Expval;
0320           // joint probability is product of event weights multiplied by weight of input distribution bin
0321           Data_ints[i][j][k] = Data_ints[i][j][k] + probi * probj * probk * xweight;
0322         }
0323       }
0324     }
0325   }
0326 
0327   for (int i = 0; i < 50; i++) {
0328     //if(i<5) std::cout << "i = " << i << std::endl;
0329     for (int j = 0; j < 50; j++) {
0330       for (int k = 0; k < 50; k++) {
0331         if ((MC_ints[i][j][k]) > 0.) {
0332           Weight3D_[i][j][k] = Data_ints[i][j][k] / MC_ints[i][j][k];
0333         } else {
0334           Weight3D_[i][j][k] = 0.;
0335         }
0336         WHist->SetBinContent(i + 1, j + 1, k + 1, Weight3D_[i][j][k]);
0337         DHist->SetBinContent(i + 1, j + 1, k + 1, Data_ints[i][j][k]);
0338         MHist->SetBinContent(i + 1, j + 1, k + 1, MC_ints[i][j][k]);
0339         //  if(i<5 && j<5 && k<5) std::cout << Weight3D_[i][j][k] << " " ;
0340       }
0341       //      if(i<5 && j<5) std::cout << std::endl;
0342     }
0343   }
0344 
0345   if (!weightFileName_.empty()) {
0346     std::cout << " 3D Weight Matrix initialized! " << std::endl;
0347     std::cout << " Writing weights to file " << weightFileName_ << " for re-use...  " << std::endl;
0348 
0349     TFile *outfile = new TFile(weightFileName_.c_str(), "RECREATE");
0350     WHist->Write();
0351     MHist->Write();
0352     DHist->Write();
0353     outfile->Write();
0354     outfile->Close();
0355     outfile->Delete();
0356   }
0357 
0358   return;
0359 }
0360 
0361 void Lumi3DReWeighting::weight3D_init(std::string WeightFileName) {
0362   TFile *infile = new TFile(WeightFileName.c_str());
0363   TH1F *WHist = (TH1F *)infile->Get("WHist");
0364 
0365   // Check if the histogram exists
0366   if (!WHist) {
0367     throw cms::Exception("HistogramNotFound") << " Could not find the histogram WHist in the file "
0368                                               << "in the file " << WeightFileName << "." << std::endl;
0369   }
0370 
0371   for (int i = 0; i < 50; i++) {
0372     //    if(i<5) std::cout << "i = " << i << std::endl;
0373     for (int j = 0; j < 50; j++) {
0374       for (int k = 0; k < 50; k++) {
0375         Weight3D_[i][j][k] = WHist->GetBinContent(i + 1, j + 1, k + 1);
0376         //  if(i<5 && j<5 && k<5) std::cout << Weight3D_[i][j][k] << " ";
0377       }
0378       //      if(i<5 && j<5) std::cout << std::endl;
0379     }
0380   }
0381 
0382   std::cout << " 3D Weight Matrix initialized! " << std::endl;
0383 
0384   return;
0385 }
0386 
0387 void Lumi3DReWeighting::weight3D_init(std::string MCWeightFileName, std::string DataWeightFileName) {
0388   TFile *infileMC = new TFile(MCWeightFileName.c_str());
0389   TH3D *MHist = (TH3D *)infileMC->Get("MHist");
0390 
0391   // Check if the histogram exists
0392   if (!MHist) {
0393     throw cms::Exception("HistogramNotFound") << " Could not find the histogram MHist in the file "
0394                                               << "in the file " << MCWeightFileName << "." << std::endl;
0395   }
0396 
0397   TFile *infileD = new TFile(DataWeightFileName.c_str());
0398   TH3D *DHist = (TH3D *)infileD->Get("DHist");
0399 
0400   // Check if the histogram exists
0401   if (!DHist) {
0402     throw cms::Exception("HistogramNotFound") << " Could not find the histogram DHist in the file "
0403                                               << "in the file " << DataWeightFileName << "." << std::endl;
0404   }
0405 
0406   for (int i = 0; i < 50; i++) {
0407     for (int j = 0; j < 50; j++) {
0408       for (int k = 0; k < 50; k++) {
0409         Weight3D_[i][j][k] = DHist->GetBinContent(i + 1, j + 1, k + 1) / MHist->GetBinContent(i + 1, j + 1, k + 1);
0410       }
0411     }
0412   }
0413 
0414   std::cout << " 3D Weight Matrix initialized! " << std::endl;
0415 
0416   return;
0417 }
0418 
0419 #endif