Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-02-10 02:52:10

0001 ///////////////////////////////////////////////////////////////////////
0002 // Author: David Bjergaard
0003 //
0004 // Library for Generic Sideband Subtraction Methods.
0005 //
0006 // This library is designed to be a tool which serves two purposes.
0007 // Its primary purpose is to provide a generic framework for doing
0008 // sideband subtraction.  It will also plug into current tag and probe
0009 // modules to prevent code duplication and redundancy.
0010 //
0011 ///////////////////////////////////////////////////////////////////////
0012 
0013 #include "PhysicsTools/Utilities/interface/SideBandSubtraction.h"
0014 // System includes
0015 #include <iostream>
0016 #include <ios>
0017 #include <fstream>
0018 #include <cstdlib>
0019 #include <sstream>
0020 
0021 // ROOT includes
0022 #include <TCanvas.h>
0023 #include <TFile.h>
0024 #include <TF1.h>
0025 #include <TH1F.h>
0026 #include <TString.h>
0027 #include <TKey.h>
0028 #include <TClass.h>
0029 
0030 // RooFit includes
0031 #include <RooFitResult.h>
0032 #include <RooRealVar.h>
0033 #include <RooAbsPdf.h>
0034 #include <RooDataSet.h>
0035 #include <RooPlot.h>
0036 
0037 using namespace RooFit;
0038 using std::cerr;
0039 using std::cout;
0040 using std::endl;
0041 using std::string;
0042 using std::vector;
0043 
0044 template <class T>
0045 inline std::string stringify(const T& t) {
0046   std::ostringstream o;
0047   if (!(o << t))
0048     return "err";
0049   return o.str();
0050 }
0051 Double_t SideBandSubtract::getYield(const std::vector<SbsRegion>& Regions, RooAbsPdf* PDF) {
0052   if (PDF == nullptr || SeparationVariable == nullptr)
0053     return 0.0;
0054 
0055   Double_t yield = 0;
0056   RooAbsReal* intPDF;
0057   for (unsigned int i = 0; i < Regions.size(); i++) {
0058     if (verbose)
0059       cout << "Integrating over Range: " << Regions[i].RegionName << " from " << Regions[i].min << " to "
0060            << Regions[i].max << endl;
0061     intPDF = PDF->createIntegral(*SeparationVariable, Range(Regions[i].RegionName.c_str()));
0062     yield += intPDF->getVal();
0063     if (verbose)
0064       cout << "Current yield: " << yield << endl;
0065   }
0066   return yield;
0067 }
0068 static void setHistOptions(TH1F* histo, string name, string title, string axis_label) {
0069   histo->SetName(name.c_str());
0070   histo->SetTitle(title.c_str());
0071   if (axis_label == "GeV/c^2")
0072     axis_label = "Mass (" + axis_label + ")";
0073   if (axis_label == "GeV/c")
0074     axis_label = "Momentum (" + axis_label + ")";
0075   histo->GetXaxis()->SetTitle(axis_label.c_str());
0076 }
0077 int SideBandSubtract::doSubtraction(RooRealVar* variable,
0078                                     Double_t stsratio,
0079                                     Int_t index)  //stsratio -> signal to sideband ratio
0080 {
0081   if (Data == nullptr || SeparationVariable == nullptr) {
0082     cerr << "ERROR: Data or SeparationVariable is NULL returning now!\n";
0083     return -1;
0084   }
0085   TH1F* SideBandHist = (TH1F*)BaseHistos[index]->Clone();
0086   setHistOptions(SideBandHist,
0087                  (string)variable->GetName() + "Sideband",
0088                  (string)SideBandHist->GetTitle() + " Sideband",
0089                  (string)variable->getUnit());
0090 
0091   TH1F* SignalHist = (TH1F*)BaseHistos[index]->Clone();
0092   setHistOptions(SignalHist,
0093                  (string)variable->GetName() + "SignalHist",
0094                  (string)SignalHist->GetTitle() + " Raw Signal",
0095                  (string)variable->getUnit());
0096 
0097   //Begin a loop over the data to fill our histograms. I should figure
0098   //out how to do this in one shot to avoid a loop
0099   //O(N_vars*N_events)...
0100 
0101   TIterator* iter = (TIterator*)Data->get()->createIterator();
0102   RooAbsArg* var = nullptr;
0103   RooRealVar* sep_var = nullptr;
0104   while ((var = (RooAbsArg*)iter->Next())) {
0105     if ((string)var->GetName() == (string)SeparationVariable->GetName()) {
0106       sep_var = (RooRealVar*)var;
0107       break;
0108     }
0109   }
0110   for (int i = 0; i < Data->numEntries(); i++) {
0111     Data->get(i);
0112     Double_t value = variable->getVal();
0113     Double_t cutval = sep_var->getVal();
0114 
0115     for (unsigned int j = 0; j < SideBandRegions.size(); j++)  //UGLY!  Find better solution!
0116     {
0117       if (cutval > SideBandRegions[j].min && cutval < SideBandRegions[j].max)
0118         SideBandHist->Fill(value);
0119     }
0120     for (unsigned int j = 0; j < SignalRegions.size(); j++) {
0121       if (cutval > SignalRegions[j].min && cutval < SignalRegions[j].max)
0122         SignalHist->Fill(value);
0123     }
0124   }
0125   //Save pre-subtracted histo
0126   SignalHist->Sumw2();
0127   SideBandHist->Sumw2();
0128   //SignalHist->SetDirectory(0); SideBandHist->SetDirectory(0);
0129   RawHistos.push_back(*SignalHist);
0130 
0131   SignalHist->Add(SideBandHist, -stsratio);
0132 
0133   SignalHist->SetTitle(((string)BaseHistos[index]->GetTitle() + " SBS Signal").c_str());
0134   //Save subtracted histo
0135   SBSHistos.push_back(*SignalHist);
0136   //Save Sideband histo
0137   SideBandHistos.push_back(*SideBandHist);
0138 
0139   if (SideBandHist)
0140     delete SideBandHist;
0141   return 0;
0142 }
0143 static void print_histo(TH1F* plot, string outname) {
0144   TCanvas genericCanvas;
0145   plot->Draw("E1P0");
0146   outname = outname + ".eps";
0147   genericCanvas.SaveAs(outname.c_str());
0148   outname.replace(outname.size() - 3, 3, "gif");
0149   genericCanvas.SaveAs(outname.c_str());
0150 }
0151 void SideBandSubtract::printResults(string prefix) {  //handles *all* printing
0152   //spool over vectors of histograms and print them, then print
0153   //separation variable plots and the results text file.
0154   if (SeparationVariable == nullptr) {
0155     cerr << "ERROR: printResults, SeparationVariable is NULL!\n";
0156     return;
0157   }
0158   string filename;  //output file name
0159   for (unsigned int i = 0; i < RawHistos.size(); ++i) {
0160     filename = prefix + "Raw_" + (string)RawHistos[i].GetName();
0161     print_histo(&RawHistos[i], filename);
0162   }
0163   for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
0164     filename = prefix + "SBS_" + (string)RawHistos[i].GetName();
0165     print_histo(&SBSHistos[i], filename);
0166   }
0167   if (verbose) {
0168     for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
0169       filename = prefix + "SideBand_" + (string)RawHistos[i].GetName();
0170       print_histo(&SideBandHistos[i], filename);
0171     }
0172   }
0173 
0174   string outname = prefix + (string)SeparationVariable->GetName() + "_fitted.eps";
0175   if (Data != nullptr && ModelPDF != nullptr) {
0176     RooPlot* SepVarFrame = SeparationVariable->frame();
0177     Data->plotOn(SepVarFrame);
0178     ModelPDF->plotOn(SepVarFrame);
0179     TCanvas Canvas;
0180     SepVarFrame->Draw();
0181     Canvas.SaveAs(outname.c_str());
0182     outname.replace(outname.size() - 3, 3, "gif");
0183     Canvas.SaveAs(outname.c_str());
0184   } else
0185     cerr << "ERROR: printResults, Data or ModelPDF is NULL!\n";
0186 
0187   string result_outname = prefix + "_fit_results.txt";
0188   std::ofstream output(result_outname.c_str(), std::ios::out);
0189   if (!output) {
0190     cout << "ERROR: Could not open file for writing!\n";
0191     return;
0192   }
0193 
0194   if (fit_result != nullptr) {
0195 #if ROOT_VERSION_CODE <= ROOT_VERSION(5, 19, 0)
0196     fit_result->printToStream(output, Verbose);
0197 #else
0198     fit_result->printMultiline(output, kTRUE);
0199 #endif
0200   }
0201 }
0202 
0203 void SideBandSubtract::saveResults(string outname) {
0204   //saves the ephemeral stuff to a root file for future
0205   //use/modification (ie everything printed by printResults())
0206 
0207   TFile output(outname.c_str(), "UPDATE");  //open the output file,
0208                                             //create it if it doesn't
0209                                             //exist
0210   //Since keys are only available from files on disk, we need to write
0211   //out a new file.  If the file already existed, then we opened to
0212   //update, and are writing nothing new.
0213   output.Write();
0214   TString dirname;
0215   TIter nextkey(output.GetListOfKeys());
0216   TKey* key;
0217   TDirectory* curDir = nullptr;
0218   while ((key = (TKey*)nextkey.Next())) {
0219     if (key == nullptr)
0220       break;
0221     TObject* obj = key->ReadObj();
0222     if (obj->IsA()->InheritsFrom("TDirectory"))
0223       dirname = obj->GetName();
0224   }
0225 
0226   if (dirname == "") {
0227     //we didn't find any directories so, we'll make a new one
0228     curDir = output.mkdir("run0", "Run 0");
0229     output.cd("run0");
0230   } else {
0231     //manipulate last dir string, make a new one, and get ready to fill
0232     dirname.Remove(0, 3);
0233     Int_t run_num = dirname.Atoi();
0234     run_num++;
0235     dirname = "run" + stringify(run_num);
0236     curDir = output.mkdir(dirname.Data(), ("Run " + stringify(run_num)).c_str());
0237     output.cd(dirname.Data());
0238   }
0239   if (curDir == nullptr)
0240     curDir = output.GetDirectory("", kTRUE, "");
0241 
0242   //these should all be the same size, but to be pedantic we'll loop
0243   //over each one individually, also, we need to associate them with
0244   //the directory because by default they "float" in memory to avoid
0245   //conflicts with other root files the user has open. If they want to
0246   //write to those files, they need to close their file, pass the name
0247   //here, and then let us work.
0248   for (unsigned int i = 0; i < SideBandHistos.size(); ++i) {
0249     SideBandHistos[i].SetDirectory(curDir);
0250     SideBandHistos[i].Write();
0251   }
0252   for (unsigned int i = 0; i < RawHistos.size(); ++i) {
0253     RawHistos[i].SetDirectory(curDir);
0254     RawHistos[i].Write();
0255   }
0256   for (unsigned int i = 0; i < SBSHistos.size(); ++i) {
0257     SBSHistos[i].SetDirectory(curDir);
0258     SBSHistos[i].Write();
0259   }
0260   if (Data != nullptr && ModelPDF != nullptr && BackgroundPDF != nullptr && SeparationVariable != nullptr) {
0261     RooPlot* sep_varFrame = SeparationVariable->frame();
0262     Data->plotOn(sep_varFrame);
0263     ModelPDF->plotOn(sep_varFrame);
0264     BackgroundPDF->plotOn(sep_varFrame);
0265     sep_varFrame->Write();
0266   } else
0267     cerr << "ERROR: saveResults, did not save RooPlot of data and fit\n";
0268   output.Write();
0269 }
0270 void SideBandSubtract::setDataSet(RooDataSet* newData) {
0271   if (newData != nullptr)
0272     Data = newData;
0273 }
0274 void SideBandSubtract::print_plot(RooRealVar* printVar, string outname) {
0275   if (Data == nullptr || ModelPDF == nullptr) {
0276     cerr << "ERROR: print_plot, Data or ModelPDF are NULL\n";
0277     return;
0278   }
0279 
0280   RooPlot* genericFrame = printVar->frame();
0281   Data->plotOn(genericFrame);
0282   ModelPDF->plotOn(genericFrame);
0283   TCanvas genericCanvas;
0284   genericFrame->Draw();
0285   outname = outname + ".eps";
0286   genericCanvas.SaveAs(outname.c_str());
0287   outname.replace(outname.size() - 3, 3, "gif");
0288   genericCanvas.SaveAs(outname.c_str());
0289 }
0290 SideBandSubtract::SideBandSubtract()
0291     : BackgroundPDF(nullptr),
0292       ModelPDF(nullptr),
0293       Data(nullptr),
0294       SeparationVariable(nullptr),
0295       verbose(false),
0296       SignalRegions(),
0297       SideBandRegions(),
0298       SideBandHistos(0),
0299       RawHistos(0),
0300       SBSHistos(0),
0301       BaseHistos(0),
0302       fit_result(nullptr),
0303       SignalSidebandRatio(0) {
0304   // Default constructor so we can do fast subtraction
0305 }
0306 SideBandSubtract::SideBandSubtract(RooAbsPdf* model_shape,
0307                                    RooAbsPdf* bkg_shape,
0308                                    RooDataSet* data,
0309                                    RooRealVar* sep_var,
0310                                    const vector<TH1F*>& base,
0311                                    bool verb)
0312     : BackgroundPDF(bkg_shape),
0313       ModelPDF(model_shape),
0314       Data(data),
0315       SeparationVariable(sep_var),
0316       verbose(verb),
0317       SignalRegions(),
0318       SideBandRegions(),
0319       SideBandHistos(),
0320       RawHistos(),
0321       SBSHistos(),
0322       BaseHistos(base),
0323       base_histo(nullptr),
0324       fit_result(nullptr),
0325       SignalSidebandRatio(0) {}
0326 /*
0327 SideBandSubtract::SideBandSubtract(RooAbsPdf *model_shape, 
0328                    RooAbsPdf *bkg_shape, 
0329                    RooDataSet* data,
0330                    RooRealVar* sep_var,
0331                    bool verb
0332                    )
0333   : BackgroundPDF(bkg_shape), 
0334     ModelPDF(model_shape), 
0335     Data(data),
0336     SeparationVariable(sep_var),
0337     verbose(verb),
0338     SignalRegions(),
0339     SideBandRegions(),
0340     SideBandHistos(),
0341     RawHistos(),
0342     SBSHistos(),
0343     BaseHistos(),
0344     base_histo(0),
0345     fit_result(0),
0346     SignalSidebandRatio(0)
0347 {
0348   // Build BaseHistos from dataset
0349   TIterator* iter = (TIterator*) Data->get()->createIterator();
0350   RooAbsArg *var=NULL;
0351   RooRealVar *variable=NULL;
0352   cout <<"Starting while loop\n";
0353   while((var = (RooAbsArg*)iter->Next()))
0354     {
0355       if((string)var->ClassName() != "RooRealVar")
0356     continue;
0357       variable = (RooRealVar*)var;
0358       //we own the data this points to so we need to delete it at the end...
0359       assert(variable!=NULL);
0360       string title = "base_"+(string)variable->GetName();
0361       base_histo = (TH1F*)Data->createHistogram(title.c_str(), *variable, Binning(variable->getBinning("default",verb,kTRUE)) );
0362       //cout <<"Made histo with name: "<<base_histo->GetName()<<endl;
0363       //base_histo->SetDirectory(0);
0364       BaseHistos.push_back(*base_histo);
0365       cout <<"Added histo to BaseHistos!\n Deleting local copy...";
0366       if(base_histo) delete base_histo;
0367       cout <<"Done!\n";
0368     }
0369 
0370 }
0371 */
0372 SideBandSubtract::~SideBandSubtract() {
0373   //**WARNING**
0374 
0375   // We don't delete objects that we don't own (duh) so, all of our
0376   // pointers just hang out and get handled by other people :)
0377 
0378   // We DO own the BaseHistos IFF the user didn't provide us with
0379   // them and we constructed them from the dataset... in this case
0380   // base_histo will be non-null and we want to delete the vector
0381   // of histograms...
0382   resetSBSProducts();
0383   /*
0384   if(base_histo!=NULL)
0385     {
0386       BaseHistos.clear();
0387     }
0388   */
0389 }
0390 void SideBandSubtract::addSignalRegion(Double_t min, Double_t max) {
0391   SbsRegion signal;
0392   signal.min = min;
0393   signal.max = max;
0394   signal.RegionName = "Signal" + stringify(SignalRegions.size());
0395   if (SeparationVariable != nullptr)
0396     SeparationVariable->setRange(signal.RegionName.c_str(), signal.min, signal.max);
0397   SignalRegions.push_back(signal);
0398   return;
0399 }
0400 void SideBandSubtract::addSideBandRegion(Double_t min, Double_t max) {
0401   SbsRegion sideband;
0402   sideband.min = min;
0403   sideband.max = max;
0404   sideband.RegionName = "SideBand" + stringify(SideBandRegions.size());
0405   if (SeparationVariable != nullptr)
0406     SeparationVariable->setRange(sideband.RegionName.c_str(), sideband.min, sideband.max);
0407   SideBandRegions.push_back(sideband);
0408   return;
0409 }
0410 int SideBandSubtract::doGlobalFit() {
0411   if (verbose)
0412     cout << "Beginning SideBand Subtraction\n";
0413 
0414   if (ModelPDF != nullptr && Data != nullptr && SeparationVariable != nullptr) {
0415     fit_result = ModelPDF->fitTo(*Data);
0416   } else {
0417     cerr << "ERROR: doGobalFit, no ModelPDF, SeparationVariable or Data specified\n";
0418     return -1;
0419   }
0420 
0421   Double_t SideBandYield = getYield(SideBandRegions, BackgroundPDF);
0422   Double_t BackgroundInSignal = getYield(SignalRegions, BackgroundPDF);
0423 
0424   SignalSidebandRatio = BackgroundInSignal / SideBandYield;
0425   if (verbose) {
0426     cout << "Finished fitting background!\n";
0427     cout << "Attained a Signal to Sideband ratio of: " << SignalSidebandRatio << endl;
0428   }
0429   //I can't see a way around a double loop for each variable.  Maybe I
0430   //can get a C++/RooFit guru to save me the trouble here?
0431 
0432   //need to grab sbs objects after each global fit, because they get reset
0433   resetSBSProducts();
0434   TIterator* iter = (TIterator*)Data->get()->createIterator();
0435   RooAbsArg* variable;
0436   while ((variable = (RooAbsArg*)iter->Next())) {
0437     for (unsigned int i = 0; i < BaseHistos.size(); i++) {
0438       if ((string)variable->GetName() != (string)SeparationVariable->GetName() &&
0439           (string)variable->GetName() == (string)BaseHistos[i]->GetName())
0440         doSubtraction((RooRealVar*)variable, SignalSidebandRatio, i);
0441     }
0442   }
0443 
0444   //  clean up our memory...
0445   if (variable)
0446     delete variable;
0447   if (iter)
0448     delete iter;
0449   return 0;
0450 }
0451 void SideBandSubtract::doFastSubtraction(TH1F& Total, TH1F& Result, SbsRegion& leftRegion, SbsRegion& rightRegion) {
0452   Int_t binMin = Total.FindBin(leftRegion.max, 0.0, 0.0);
0453   Int_t binMax = Total.FindBin(leftRegion.min, 0.0, 0.0);
0454   double numLeft = Total.Integral(binMin, binMax);
0455 
0456   binMin = Total.FindBin(rightRegion.max, 0.0, 0.0);
0457   binMax = Total.FindBin(rightRegion.min, 0.0, 0.0);
0458   double numRight = Total.Integral(binMin, binMax);
0459 
0460   const unsigned int nbinsx = Total.GetNbinsX();
0461   const double x1 = (leftRegion.max + leftRegion.min) / 2.0;
0462   const double x2 = (rightRegion.max + rightRegion.min) / 2.0;
0463 
0464   const double y1 = numLeft / (leftRegion.max - leftRegion.min);
0465   const double y2 = numRight / (rightRegion.max - rightRegion.min);
0466 
0467   const double Slope = (y2 - y1) / (x2 - x1);
0468   const double Intercept = y1 - Slope * x1;
0469   // Evantually we want to handle more complicated functions, but for
0470   // now... just use a linear one
0471   TF1 function("sbs_function_line", "[0]*x + [1]", Total.GetMinimum(), Total.GetMaximum());
0472   for (unsigned int binx = 1; binx <= nbinsx; ++binx) {
0473     double binWidth = Total.GetBinWidth(binx);
0474     function.SetParameter(0, Slope * binWidth);
0475     function.SetParameter(1, Intercept * binWidth);
0476 
0477     double xx = Total.GetBinCenter(binx);
0478     double cu = Total.GetBinContent(binx) - function.Eval(xx);
0479     // TODO: Propogate the  error on the parameters in function.
0480     double error1 = Total.GetBinError(binx);
0481 
0482     Result.SetBinContent(binx, cu);
0483     Result.SetBinError(binx, error1);
0484   }
0485   Result.SetEntries(Result.Integral());
0486 }
0487 RooFitResult* SideBandSubtract::getFitResult() { return fit_result; }
0488 vector<TH1F*> SideBandSubtract::getBaseHistos() { return BaseHistos; }
0489 vector<TH1F> SideBandSubtract::getRawHistos() { return RawHistos; }
0490 vector<TH1F> SideBandSubtract::getSBSHistos() { return SBSHistos; }
0491 Double_t SideBandSubtract::getSTSRatio() { return SignalSidebandRatio; }
0492 void SideBandSubtract::resetSBSProducts() {
0493   //cout <<"Starting to reset products \n";
0494 
0495   if (!SideBandHistos.empty())
0496     SideBandHistos.clear();
0497 
0498   if (!RawHistos.empty())
0499     RawHistos.clear();
0500   if (!SBSHistos.empty())
0501     SBSHistos.clear();
0502 }