Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:22

0001 ///////////////////////////////////////////////////////////////////////
0002 // Author: David Bjergaard
0003 //
0004 // Library for Generic Sideband Subtraction Methods.
0005 //
0006 // This library is designed to be a tool which serves two purposes.
0007 // Its primary purpose is to provide a generic framework for doing
0008 // sideband subtraction.  It will also plug into current tag and probe
0009 // modules to prevent code duplication and redundancy.
0010 //
0011 ///////////////////////////////////////////////////////////////////////
0012 
0013 #include "PhysicsTools/Utilities/interface/SideBandSubtraction.h"
0014 // System includes
0015 #include <iostream>
0016 #include <ios>
0017 #include <fstream>
0018 #include <cstdlib>
0019 #include <sstream>
0020 
0021 // ROOT includes
0022 #include <TCanvas.h>
0023 #include <TFile.h>
0024 #include <TF1.h>
0025 #include <TH1F.h>
0026 #include <TString.h>
0027 #include <TKey.h>
0028 #include <TClass.h>
0029 
0030 // RooFit includes
0031 #include <RooFitResult.h>
0032 #include <RooRealVar.h>
0033 #include <RooAbsPdf.h>
0034 #include <RooDataSet.h>
0035 #include <RooPlot.h>
0036 
0037 using namespace RooFit;
0038 using std::cerr;
0039 using std::cout;
0040 using std::endl;
0041 using std::string;
0042 using std::vector;
0043 
0044 template <class T>
0045 inline std::string stringify(const T& t) {
0046   std::ostringstream o;
0047   if (!(o << t))
0048     return "err";
0049   return o.str();
0050 }
0051 Double_t SideBandSubtract::getYield(const std::vector<SbsRegion>& Regions, RooAbsPdf* PDF) {
0052   if (PDF == nullptr || SeparationVariable == nullptr)
0053     return 0.0;
0054 
0055   Double_t yield = 0;
0056   RooAbsReal* intPDF;
0057   for (unsigned int i = 0; i < Regions.size(); i++) {
0058     if (verbose)
0059       cout << "Integrating over Range: " << Regions[i].RegionName << " from " << Regions[i].min << " to "
0060            << Regions[i].max << endl;
0061     intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
0062     yield += intPDF->getVal();
0063     if (verbose)
0064       cout << "Current yield: " << yield << endl;
0065   }
0066   return yield;
0067 }
0068 static void setHistOptions(TH1F* histo, string name, string title, string axis_label) {
0069   histo->SetName(name.c_str());
0070   histo->SetTitle(title.c_str());
0071   if (axis_label == "GeV/c^2")
0072     axis_label = "Mass (" + axis_label + ")";
0073   if (axis_label == "GeV/c")
0074     axis_label = "Momentum (" + axis_label + ")";
0075   histo->GetXaxis()->SetTitle(axis_label.c_str());
0076 }
0077 int SideBandSubtract::doSubtraction(RooRealVar* variable,
0078                                     Double_t stsratio,
0079                                     Int_t index)  //stsratio -> signal to sideband ratio
0080 {
0081   if (Data == nullptr || SeparationVariable == nullptr) {
0082     cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
0083     return -1;
0084   }
0085   TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
0086   setHistOptions(SideBandHist,
0087                  (string)variable->GetName() + "Sideband",
0088                  (string)SideBandHist->GetTitle() + " Sideband",
0089                  (string)variable->getUnit());
0090 
0091   TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
0092   setHistOptions(SignalHist,
0093                  (string)variable->GetName() + "SignalHist",
0094                  (string)SignalHist->GetTitle() + " Raw Signal",
0095                  (string)variable->getUnit());
0096 
0097   //Begin a loop over the data to fill our histograms. I should figure
0098   //out how to do this in one shot to avoid a loop
0099   //O(N_vars*N_events)...
0100 
0101   RooRealVar* sep_var = nullptr;
0102   for (const auto& var : *Data->get()) {
0103     if ((string)var->GetName() == (string)SeparationVariable->GetName()) {
0104       sep_var = (RooRealVar*)var;
0105       break;
0106     }
0107   }
0108 
0109   for (int i = 0; i < Data->numEntries(); i++) {
0110     Data->get(i);
0111     Double_t value = variable->getVal();
0112     Double_t cutval = sep_var->getVal();
0113 
0114     for (unsigned int j = 0; j < SideBandRegions.size(); j++)  //UGLY!  Find better solution!
0115     {
0116       if (cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
0117         SideBandHist->Fill(value);
0118     }
0119     for (unsigned int j = 0; j < SignalRegions.size(); j++) {
0120       if (cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
0121         SignalHist->Fill(value);
0122     }
0123   }
0124   //Save pre-subtracted histo
0125   SignalHist->Sumw2();
0126   SideBandHist->Sumw2();
0127   //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
0128   RawHistos.push_back(*SignalHist);
0129 
0130   SignalHist->Add(SideBandHist, -stsratio);
0131 
0132   SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
0133   //Save subtracted histo
0134   SBSHistos.push_back(*SignalHist);
0135   //Save Sideband histo
0136   SideBandHistos.push_back(*SideBandHist);
0137 
0138   if (SideBandHist)
0139     delete SideBandHist;
0140   return 0;
0141 }
0142 static void print_histo(TH1F* plot, string outname) {
0143   TCanvas genericCanvas;
0144   plot->Draw("E1P0");
0145   outname = outname + ".eps";
0146   genericCanvas.SaveAs(outname.c_str());
0147   outname.replace(outname.size() - 3, 3, "gif");
0148   genericCanvas.SaveAs(outname.c_str());
0149 }
0150 void SideBandSubtract::printResults(string prefix) {  //handles *all* printing
0151   //spool over vectors of histograms and print them, then print
0152   //separation variable plots and the results text file.
0153   if (SeparationVariable == nullptr) {
0154     cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
0155     return;
0156   }
0157   string filename;  //output file name
0158   for (unsigned int i = 0; i < RawHistos.size(); ++i) {
0159     filename = prefix + "Raw_" + (string)RawHistos[i].GetName();
0160     print_histo(&RawHistos[i], filename);
0161   }
0162   for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
0163     filename = prefix + "SBS_" + (string)RawHistos[i].GetName();
0164     print_histo(&SBSHistos[i], filename);
0165   }
0166   if (verbose) {
0167     for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
0168       filename = prefix + "SideBand_" + (string)RawHistos[i].GetName();
0169       print_histo(&SideBandHistos[i], filename);
0170     }
0171   }
0172 
0173   string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
0174   if (Data != nullptr && ModelPDF != nullptr) {
0175     RooPlot* SepVarFrame = SeparationVariable->frame();
0176     Data->plotOn(SepVarFrame);
0177     ModelPDF->plotOn(SepVarFrame);
0178     TCanvas Canvas;
0179     SepVarFrame->Draw();
0180     Canvas.SaveAs(outname.c_str());
0181     outname.replace(outname.size() - 3, 3, "gif");
0182     Canvas.SaveAs(outname.c_str());
0183   } else
0184     cerr << "ERROR: printResults, Data or ModelPDF is NULL!\n";
0185 
0186   string result_outname = prefix + "_fit_results.txt";
0187   std::ofstream output(result_outname.c_str(), std::ios::out);
0188   if (!output) {
0189     cout << "ERROR: Could not open file for writing!\n";
0190     return;
0191   }
0192 
0193   if (fit_result != nullptr) {
0194 #if ROOT_VERSION_CODE <= ROOT_VERSION(5, 19, 0)
0195     fit_result->printToStream(output, Verbose);
0196 #else
0197     fit_result->printMultiline(output, kTRUE);
0198 #endif
0199   }
0200 }
0201 
0202 void SideBandSubtract::saveResults(string outname) {
0203   //saves the ephemeral stuff to a root file for future
0204   //use/modification (ie everything printed by printResults())
0205 
0206   TFile output(outname.c_str(), "UPDATE");  //open the output file,
0207                                             //create it if it doesn't
0208                                             //exist
0209   //Since keys are only available from files on disk, we need to write
0210   //out a new file.  If the file already existed, then we opened to
0211   //update, and are writing nothing new.
0212   output.Write();
0213   TString dirname;
0214   TIter nextkey(output.GetListOfKeys());
0215   TKey* key;
0216   TDirectory* curDir = nullptr;
0217   while ((key = (TKey*)nextkey.Next())) {
0218     if (key == nullptr)
0219       break;
0220     TObject* obj = key->ReadObj();
0221     if (obj->IsA()->InheritsFrom("TDirectory"))
0222       dirname = obj->GetName();
0223   }
0224 
0225   if (dirname == "") {
0226     //we didn't find any directories so, we'll make a new one
0227     curDir = output.mkdir("run0", "Run 0");
0228     output.cd("run0");
0229   } else {
0230     //manipulate last dir string, make a new one, and get ready to fill
0231     dirname.Remove(0, 3);
0232     Int_t run_num = dirname.Atoi();
0233     run_num++;
0234     dirname = "run" + stringify(run_num);
0235     curDir = output.mkdir(dirname.Data(), ("Run " + stringify(run_num)).c_str());
0236     output.cd(dirname.Data());
0237   }
0238   if (curDir == nullptr)
0239     curDir = output.GetDirectory("", kTRUE, "");
0240 
0241   //these should all be the same size, but to be pedantic we'll loop
0242   //over each one individually, also, we need to associate them with
0243   //the directory because by default they "float" in memory to avoid
0244   //conflicts with other root files the user has open. If they want to
0245   //write to those files, they need to close their file, pass the name
0246   //here, and then let us work.
0247   for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
0248     SideBandHistos[i].SetDirectory(curDir);
0249     SideBandHistos[i].Write();
0250   }
0251   for (unsigned int i = 0; i < RawHistos.size(); ++i) {
0252     RawHistos[i].SetDirectory(curDir);
0253     RawHistos[i].Write();
0254   }
0255   for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
0256     SBSHistos[i].SetDirectory(curDir);
0257     SBSHistos[i].Write();
0258   }
0259   if (Data != nullptr && ModelPDF != nullptr && BackgroundPDF != nullptr && SeparationVariable != nullptr) {
0260     RooPlot* sep_varFrame = SeparationVariable->frame();
0261     Data->plotOn(sep_varFrame);
0262     ModelPDF->plotOn(sep_varFrame);
0263     BackgroundPDF->plotOn(sep_varFrame);
0264     sep_varFrame->Write();
0265   } else
0266     cerr << "ERROR: saveResults, did not save RooPlot of data and fit\n";
0267   output.Write();
0268 }
0269 void SideBandSubtract::setDataSet(RooDataSet* newData) {
0270   if (newData != nullptr)
0271     Data = newData;
0272 }
0273 void SideBandSubtract::print_plot(RooRealVar* printVar, string outname) {
0274   if (Data == nullptr || ModelPDF == nullptr) {
0275     cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
0276     return;
0277   }
0278 
0279   RooPlot* genericFrame = printVar->frame();
0280   Data->plotOn(genericFrame);
0281   ModelPDF->plotOn(genericFrame);
0282   TCanvas genericCanvas;
0283   genericFrame->Draw();
0284   outname = outname + ".eps";
0285   genericCanvas.SaveAs(outname.c_str());
0286   outname.replace(outname.size() - 3, 3, "gif");
0287   genericCanvas.SaveAs(outname.c_str());
0288 }
0289 SideBandSubtract::SideBandSubtract()
0290     : BackgroundPDF(nullptr),
0291       ModelPDF(nullptr),
0292       Data(nullptr),
0293       SeparationVariable(nullptr),
0294       verbose(false),
0295       SignalRegions(),
0296       SideBandRegions(),
0297       SideBandHistos(0),
0298       RawHistos(0),
0299       SBSHistos(0),
0300       BaseHistos(0),
0301       fit_result(nullptr),
0302       SignalSidebandRatio(0) {
0303   // Default constructor so we can do fast subtraction
0304 }
0305 SideBandSubtract::SideBandSubtract(RooAbsPdf* model_shape,
0306                                    RooAbsPdf* bkg_shape,
0307                                    RooDataSet* data,
0308                                    RooRealVar* sep_var,
0309                                    const vector<TH1F*>& base,
0310                                    bool verb)
0311     : BackgroundPDF(bkg_shape),
0312       ModelPDF(model_shape),
0313       Data(data),
0314       SeparationVariable(sep_var),
0315       verbose(verb),
0316       SignalRegions(),
0317       SideBandRegions(),
0318       SideBandHistos(),
0319       RawHistos(),
0320       SBSHistos(),
0321       BaseHistos(base),
0322       base_histo(nullptr),
0323       fit_result(nullptr),
0324       SignalSidebandRatio(0) {}
0325 /*
0326 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
0327                    RooAbsPdf *bkg_shape, 
0328                    RooDataSet* data,
0329                    RooRealVar* sep_var,
0330                    bool verb
0331                    )
0332   : BackgroundPDF(bkg_shape), 
0333     ModelPDF(model_shape), 
0334     Data(data),
0335     SeparationVariable(sep_var),
0336     verbose(verb),
0337     SignalRegions(),
0338     SideBandRegions(),
0339     SideBandHistos(),
0340     RawHistos(),
0341     SBSHistos(),
0342     BaseHistos(),
0343     base_histo(0),
0344     fit_result(0),
0345     SignalSidebandRatio(0)
0346 {
0347   // Build BaseHistos from dataset
0348   TIterator* iter = (TIterator*) Data->get()->createIterator();
0349   RooAbsArg *var=NULL;
0350   RooRealVar *variable=NULL;
0351   cout <<"Starting while loop\n";
0352   while((var = (RooAbsArg*)iter->Next()))
0353     {
0354       if((string)var->ClassName() != "RooRealVar")
0355     continue;
0356       variable = (RooRealVar*)var;
0357       //we own the data this points to so we need to delete it at the end...
0358       assert(variable!=NULL);
0359       string title = "base_"+(string)variable->GetName();
0360       base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
0361       //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
0362       //base_histo->SetDirectory(0);
0363       BaseHistos.push_back(*base_histo);
0364       cout <<"Added histo to BaseHistos!\n Deleting local copy...";
0365       if(base_histo) delete base_histo;
0366       cout <<"Done!\n";
0367     }
0368 
0369 }
0370 */
0371 SideBandSubtract::~SideBandSubtract() {
0372   //**WARNING**
0373 
0374   // We don't delete objects that we don't own (duh) so, all of our
0375   // pointers just hang out and get handled by other people :)
0376 
0377   // We DO own the BaseHistos IFF the user didn't provide us with
0378   // them and we constructed them from the dataset... in this case
0379   // base_histo will be non-null and we want to delete the vector
0380   // of histograms...
0381   resetSBSProducts();
0382   /*
0383   if(base_histo!=NULL)
0384     {
0385       BaseHistos.clear();
0386     }
0387   */
0388 }
0389 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max) {
0390   SbsRegion signal;
0391   signal.min = min;
0392   signal.max = max;
0393   signal.RegionName = "Signal" + stringify(SignalRegions.size());
0394   if (SeparationVariable != nullptr)
0395     SeparationVariable->setRange(signal.RegionName.c_str(), signal.min, signal.max);
0396   SignalRegions.push_back(signal);
0397   return;
0398 }
0399 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max) {
0400   SbsRegion sideband;
0401   sideband.min = min;
0402   sideband.max = max;
0403   sideband.RegionName = "SideBand" + stringify(SideBandRegions.size());
0404   if (SeparationVariable != nullptr)
0405     SeparationVariable->setRange(sideband.RegionName.c_str(), sideband.min, sideband.max);
0406   SideBandRegions.push_back(sideband);
0407   return;
0408 }
0409 int SideBandSubtract::doGlobalFit() {
0410   if (verbose)
0411     cout << "Beginning SideBand Subtraction\n";
0412 
0413   if (ModelPDF != nullptr && Data != nullptr && SeparationVariable != nullptr) {
0414     fit_result = ModelPDF->fitTo(*Data);
0415   } else {
0416     cerr << "ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
0417     return -1;
0418   }
0419 
0420   Double_t SideBandYield = getYield(SideBandRegions, BackgroundPDF);
0421   Double_t BackgroundInSignal = getYield(SignalRegions, BackgroundPDF);
0422 
0423   SignalSidebandRatio = BackgroundInSignal / SideBandYield;
0424   if (verbose) {
0425     cout << "Finished fitting background!\n";
0426     cout << "Attained a Signal to Sideband ratio of: " << SignalSidebandRatio << endl;
0427   }
0428   //I can't see a way around a double loop for each variable.  Maybe I
0429   //can get a C++/RooFit guru to save me the trouble here?
0430 
0431   //need to grab sbs objects after each global fit, because they get reset
0432   resetSBSProducts();
0433   for (const auto& variable : *Data->get()) {
0434     for (unsigned int i = 0; i < BaseHistos.size(); i++) {
0435       if ((string)variable->GetName() != (string)SeparationVariable->GetName() &&
0436           (string)variable->GetName() == (string)BaseHistos[i]->GetName())
0437         doSubtraction((RooRealVar*)variable, SignalSidebandRatio, i);
0438     }
0439   }
0440 
0441   return 0;
0442 }
0443 void SideBandSubtract::doFastSubtraction(TH1F& Total, TH1F& Result, SbsRegion& leftRegion, SbsRegion& rightRegion) {
0444   Int_t binMin = Total.FindBin(leftRegion.max, 0.0, 0.0);
0445   Int_t binMax = Total.FindBin(leftRegion.min, 0.0, 0.0);
0446   double numLeft = Total.Integral(binMin, binMax);
0447 
0448   binMin = Total.FindBin(rightRegion.max, 0.0, 0.0);
0449   binMax = Total.FindBin(rightRegion.min, 0.0, 0.0);
0450   double numRight = Total.Integral(binMin, binMax);
0451 
0452   const unsigned int nbinsx = Total.GetNbinsX();
0453   const double x1 = (leftRegion.max + leftRegion.min) / 2.0;
0454   const double x2 = (rightRegion.max + rightRegion.min) / 2.0;
0455 
0456   const double y1 = numLeft / (leftRegion.max - leftRegion.min);
0457   const double y2 = numRight / (rightRegion.max - rightRegion.min);
0458 
0459   const double Slope = (y2 - y1) / (x2 - x1);
0460   const double Intercept = y1 - Slope * x1;
0461   // Evantually we want to handle more complicated functions, but for
0462   // now... just use a linear one
0463   TF1 function("sbs_function_line", "[0]*x + [1]", Total.GetMinimum(), Total.GetMaximum());
0464   for (unsigned int binx = 1; binx <= nbinsx; ++binx) {
0465     double binWidth = Total.GetBinWidth(binx);
0466     function.SetParameter(0, Slope * binWidth);
0467     function.SetParameter(1, Intercept * binWidth);
0468 
0469     double xx = Total.GetBinCenter(binx);
0470     double cu = Total.GetBinContent(binx) - function.Eval(xx);
0471     // TODO: Propogate the  error on the parameters in function.
0472     double error1 = Total.GetBinError(binx);
0473 
0474     Result.SetBinContent(binx, cu);
0475     Result.SetBinError(binx, error1);
0476   }
0477   Result.SetEntries(Result.Integral());
0478 }
0479 RooFitResult* SideBandSubtract::getFitResult() { return fit_result; }
0480 vector<TH1F*> SideBandSubtract::getBaseHistos() { return BaseHistos; }
0481 vector<TH1F> SideBandSubtract::getRawHistos() { return RawHistos; }
0482 vector<TH1F> SideBandSubtract::getSBSHistos() { return SBSHistos; }
0483 Double_t SideBandSubtract::getSTSRatio() { return SignalSidebandRatio; }
0484 void SideBandSubtract::resetSBSProducts() {
0485   //cout <<"Starting to reset products \n";
0486 
0487   if (!SideBandHistos.empty())
0488     SideBandHistos.clear();
0489 
0490   if (!RawHistos.empty())
0491     RawHistos.clear();
0492   if (!SBSHistos.empty())
0493     SBSHistos.clear();
0494 }