Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-01-09 23:32:01

0001 #include "PhysicsTools/TagAndProbe/interface/TagProbeFitter.h"
0002 #include <memory>
0003 
0004 #include <stdexcept>
0005 
0006 #include "TROOT.h"
0007 #include "TFile.h"
0008 #include "TPad.h"
0009 #include "TText.h"
0010 #include "TCanvas.h"
0011 #include "TGraphAsymmErrors.h"
0012 #include "TH2F.h"
0013 #include "TStyle.h"
0014 #include "Math/QuantFuncMathCore.h"
0015 #include "RooAbsReal.h"
0016 #include "RooBinning.h"
0017 #include "RooBinningCategory.h"
0018 #include "RooCategory.h"
0019 #include "RooDataHist.h"
0020 #include "RooDataSet.h"
0021 #include "RooFitResult.h"
0022 #include "RooFormulaVar.h"
0023 #include "RooGlobalFunc.h"
0024 #include "RooLinkedListIter.h"
0025 #include "RooMappedCategory.h"
0026 #include "RooMinimizer.h"
0027 #include "RooMsgService.h"
0028 #include "RooMultiCategory.h"
0029 #include "RooNumIntConfig.h"
0030 #include "RooPlot.h"
0031 #include "RooProdPdf.h"
0032 #include "RooProfileLL.h"
0033 #include "RooRealVar.h"
0034 #include "RooThresholdCategory.h"
0035 #include "RooTrace.h"
0036 #include "RooWorkspace.h"
0037 #include "RooTreeDataStore.h"
0038 
0039 using namespace std;
0040 using namespace RooFit;
0041 
0042 TagProbeFitter::TagProbeFitter(const std::vector<std::string>& inputFileNames,
0043                                string inputDirectoryName,
0044                                string inputTreeName,
0045                                string outputFileName,
0046                                int numCPU_,
0047                                bool saveWorkspace_,
0048                                bool floatShapeParameters_,
0049                                const std::vector<std::string>& fixVars_) {
0050   inputTree = new TChain((inputDirectoryName + "/" + inputTreeName).c_str());
0051   for (size_t i = 0; i < inputFileNames.size(); i++) {
0052     inputTree->Add(inputFileNames[i].c_str());
0053   }
0054   outputFile = new TFile(outputFileName.c_str(), "recreate");
0055   outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
0056   numCPU = numCPU_;
0057   saveWorkspace = saveWorkspace_;
0058   massBins = 0;  // automatic default
0059   floatShapeParameters = floatShapeParameters_;
0060   fixVars = fixVars_;
0061   weightVar = "";
0062   if (!floatShapeParameters && fixVars.empty())
0063     std::cout << "TagProbeFitter: "
0064               << "You wnat to fix some variables but do not specify them!";
0065 
0066   gROOT->SetStyle("Plain");
0067   gStyle->SetTitleFillColor(0);
0068   gStyle->SetPalette(1);
0069   gStyle->SetOptStat(0);
0070   gStyle->SetPaintTextFormat(".2f");
0071 
0072   quiet = false;
0073 
0074   binnedFit = false;
0075 
0076   doSaveDistributionsPlot = true;
0077 
0078   // make integration very precise
0079   RooAbsReal::defaultIntegratorConfig()->setEpsAbs(1e-13);
0080   RooAbsReal::defaultIntegratorConfig()->setEpsRel(1e-13);
0081 
0082   split_mode = 0;
0083 }
0084 
0085 TagProbeFitter::~TagProbeFitter() {
0086   if (inputTree)
0087     delete inputTree;
0088   if (outputFile)
0089     outputFile->Close();
0090 }
0091 
0092 void TagProbeFitter::setQuiet(bool quiet_) {
0093   quiet = quiet_;
0094   if (quiet) {
0095     RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
0096   } else {
0097     RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
0098   }
0099 }
0100 
0101 void TagProbeFitter::setSplitMode(unsigned int nevents) { split_mode = nevents; }
0102 
0103 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units) {
0104   RooRealVar temp(name.c_str(), title.c_str(), low, hi, units.c_str());
0105   temp.setBins(5000, "cache");
0106   variables.addClone(temp);
0107   return true;
0108 }
0109 
0110 bool TagProbeFitter::addCategory(string name, string title, string expression) {
0111   RooCategory* c = (RooCategory*)parameterParser.factory(expression.c_str());
0112   if (!c)
0113     return false;
0114   //set the name of the category to the passed name instead of the one in the expression
0115   c->SetName(name.c_str());
0116   c->SetTitle(title.c_str());
0117   variables.addClone(*c);
0118   return true;
0119 }
0120 
0121 bool TagProbeFitter::addExpression(string expressionName,
0122                                    string title,
0123                                    string expression,
0124                                    const std::vector<string>& arguments) {
0125   expressionVars.push_back(make_pair(make_pair(expressionName, title), make_pair(expression, arguments)));
0126   return true;
0127 }
0128 
0129 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue) {
0130   thresholdCategories.push_back(make_pair(make_pair(categoryName, title), make_pair(varName, cutValue)));
0131   return true;
0132 }
0133 
0134 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands) { pdfs[name] = pdfCommands; }
0135 
0136 void TagProbeFitter::setBinsForMassPlots(int bins) { massBins = bins; }
0137 
0138 void TagProbeFitter::setWeightVar(const std::string& var) { weightVar = var; }
0139 
0140 string TagProbeFitter::calculateEfficiency(string dirName,
0141                                            const std::vector<string>& effCats,
0142                                            const std::vector<string>& effStates,
0143                                            vector<string>& unbinnedVariables,
0144                                            map<string, vector<double> >& binnedReals,
0145                                            map<string, std::vector<string> >& binnedCategories,
0146                                            vector<string>& binToPDFmap) {
0147   //go to home directory
0148   outputDirectory->cd();
0149   //make a directory corresponding to this efficiency binning
0150   gDirectory->mkdir(dirName.c_str())->cd();
0151 
0152   RooArgSet dataVars;
0153 
0154   //collect unbinned variables
0155   for (vector<string>::iterator v = unbinnedVariables.begin(); v != unbinnedVariables.end(); v++) {
0156     dataVars.addClone(variables[v->c_str()], true);
0157     if (binnedFit && (v == unbinnedVariables.begin())) {
0158       ((RooRealVar&)dataVars[v->c_str()]).setBins(massBins);
0159     }
0160   }
0161   //collect the binned variables and the corresponding bin categories
0162   RooArgSet binnedVariables;
0163   RooArgSet binCategories;
0164   for (map<string, vector<double> >::iterator v = binnedReals.begin(); v != binnedReals.end(); v++) {
0165     TString name = v->first;
0166     if (variables.find(name) == nullptr) {
0167       cerr << "Binned variable '" << name << "' not found." << endl;
0168       return "Error";
0169     }
0170     binnedVariables.add(*dataVars.addClone(variables[name]));
0171     ((RooRealVar&)binnedVariables[name]).setBinning(RooBinning(v->second.size() - 1, &v->second[0]));
0172     binCategories.addClone(RooBinningCategory(name + "_bins", name + "_bins", (RooRealVar&)binnedVariables[name]));
0173   }
0174 
0175   //collect the category variables and the corresponding mapped categories
0176   RooArgSet categories;
0177   RooArgSet mappedCategories;
0178   for (map<string, vector<string> >::iterator v = binnedCategories.begin(); v != binnedCategories.end(); v++) {
0179     TString name = v->first;
0180     if (variables.find(name) == nullptr) {
0181       cerr << "Binned category '" << name << "' not found." << endl;
0182       return "Error";
0183     }
0184     categories.add(*dataVars.addClone(variables[name]));
0185     mappedCategories.addClone(RooMappedCategory(name + "_bins", name + "_bins", (RooCategory&)categories[name]));
0186     for (unsigned int i = 0; i < v->second.size(); i++) {
0187       ((RooMappedCategory&)mappedCategories[name + "_bins"])
0188           .map(v->second[i].c_str(), name + "_" + TString(v->second[i].c_str()).ReplaceAll(",", "_"));
0189     }
0190   }
0191 
0192   // add the efficiency category if it's not a dynamic one
0193   for (vector<string>::const_iterator effCat = effCats.begin(); effCat != effCats.end(); ++effCat) {
0194     if (variables.find(effCat->c_str()) != nullptr) {
0195       dataVars.addClone(variables[effCat->c_str()], true);
0196     }
0197   }
0198 
0199   //  add all variables used in expressions
0200   for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(),
0201                                                                                            eve = expressionVars.end();
0202        ev != eve;
0203        ++ev) {
0204     for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
0205       // provided that they are real variables themselves
0206       if (variables.find(it->c_str()))
0207         dataVars.addClone(variables[it->c_str()], true);
0208     }
0209   }
0210   // add all real variables used in cuts
0211   for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0212                                                                                   tce = thresholdCategories.end();
0213        tc != tce;
0214        ++tc) {
0215     if (variables.find(tc->second.first.c_str()))
0216       dataVars.addClone(variables[tc->second.first.c_str()], true);
0217   }
0218 
0219   //now add the necessary mass and passing variables to make the unbinned RooDataSet
0220   RooDataSet* data(nullptr);
0221   if (not split_mode) {
0222     data = new RooDataSet("data",
0223                           "data",
0224                           inputTree,
0225                           dataVars,
0226                           /*selExpr=*/"",
0227                           /*wgtVarName=*/(weightVar.empty() ? nullptr : weightVar.c_str()));
0228 
0229     // Now add all expressions that are computed dynamically
0230     for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0231              ev = expressionVars.begin(),
0232              eve = expressionVars.end();
0233          ev != eve;
0234          ++ev) {
0235       RooArgList args;
0236       for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0237            ++it) {
0238         args.add(dataVars[it->c_str()]);
0239       }
0240       RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0241       RooRealVar* col = (RooRealVar*)data->addColumn(expr);
0242       dataVars.addClone(*col);
0243     }
0244 
0245     // And add all dynamic categories from thresholds
0246     for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0247                                                                                     tce = thresholdCategories.end();
0248          tc != tce;
0249          ++tc) {
0250       RooThresholdCategory tmp(tc->first.first.c_str(),
0251                                tc->first.second.c_str(),
0252                                (RooAbsReal&)dataVars[tc->second.first.c_str()],
0253                                "above",
0254                                1);
0255       tmp.addThreshold(tc->second.second, "below", 0);
0256       RooCategory* cat = (RooCategory*)data->addColumn(tmp);
0257       dataVars.addClone(*cat);
0258     }
0259   }
0260 
0261   //merge the bin categories to a MultiCategory for convenience
0262   RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
0263   string effName;
0264 
0265   if (not split_mode) {
0266     data->addColumn(allCats);
0267     //setup the efficiency category
0268     if (effCats.size() == 1) {
0269       effName = effCats.front() + "::" + effStates.front();
0270       RooMappedCategory efficiencyCategory(
0271           "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0272       efficiencyCategory.map(effStates.front().c_str(), "Passed");
0273       data->addColumn(efficiencyCategory);
0274     } else {
0275       RooArgSet rooEffCats;
0276       string multiState = "{";
0277       for (size_t i = 0; i < effCats.size(); ++i) {
0278         if (i) {
0279           multiState += ";";
0280           effName += " && ";
0281         }
0282         rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0283         multiState += effStates[i];
0284         effName = effCats[i] + "::" + effStates[i];
0285       }
0286       multiState += "}";
0287       RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0288       RooMappedCategory efficiencyCategory(
0289           "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0290       efficiencyCategory.map(multiState.c_str(), "Passed");
0291       data->addColumn(efficiencyCategory);
0292     }
0293   }
0294 
0295   //setup the pdf category
0296   RooMappedCategory pdfCategory(
0297       "_pdfCategory_", "_pdfCategory_", allCats, (!binToPDFmap.empty()) ? binToPDFmap[0].c_str() : "all");
0298   for (unsigned int i = 1; i < binToPDFmap.size(); i += 2) {
0299     pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i + 1].c_str());
0300   }
0301   if (not split_mode)
0302     data->addColumn(pdfCategory);
0303 
0304   //create the empty efficiency datasets from the binned variables
0305   RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
0306 
0307   RooDataSet fitEfficiency("fit_eff",
0308                            "Efficiency from unbinned ML fit",
0309                            RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0310                            StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0311 
0312   RooDataSet cntEfficiency("cnt_eff",
0313                            "Efficiency from counting",
0314                            RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0315                            StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0316 
0317   if (not split_mode) {
0318     if (!floatShapeParameters) {
0319       //fitting whole dataset to get initial values for some parameters
0320       RooWorkspace* w = new RooWorkspace();
0321       w->import(*data);
0322       efficiency.setVal(0);  //reset
0323       efficiency.setAsymError(0, 0);
0324       std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
0325       doFitEfficiency(w, pdfCategory.getLabel(), efficiency);
0326       delete w;
0327     }
0328   } else {
0329     // disactive not needed branches
0330     inputTree->SetBranchStatus("*", false);
0331     TIterator* iter = dataVars.createIterator();
0332     TObject* obj(nullptr);
0333     while ((obj = iter->Next()))
0334       inputTree->SetBranchStatus(obj->GetName(), true);
0335   }
0336 
0337   // loop over all bins with the help of allCats
0338   // store index values in a separate vector to avoid issues
0339   // with iteration over changing allCats object
0340   std::vector<Int_t> allCatValues;
0341   TIterator* it = allCats.typeIterator();
0342   for (RooCatType* t = (RooCatType*)it->Next(); t != nullptr; t = (RooCatType*)it->Next())
0343     allCatValues.push_back(t->getVal());
0344   for (auto iCat : allCatValues) {
0345     const RooCatType* t = allCats.lookupType(iCat);
0346     //name of the multicategory
0347     TString catName = t->GetName();
0348     //skip unmapped states
0349     if (catName.Contains("NotMapped"))
0350       continue;
0351 
0352     RooDataSet* data_bin(nullptr);
0353     RooArgSet tmpVars;
0354 
0355     if (not split_mode) {
0356       //create the dataset
0357       data_bin = (RooDataSet*)data->reduce(Cut(TString::Format("allCats==%d", iCat)));
0358     } else {
0359       data_bin = new RooDataSet("data", "data", dataVars, (weightVar.empty() ? nullptr : weightVar.c_str()));
0360 
0361       TDirectory* tmp = gDirectory;
0362       gROOT->cd();
0363 
0364       // loop over input data and fill the dataset with events for
0365       // current category
0366       unsigned int n_entries = inputTree->GetEntries();
0367       printf("Input number of events: %u\n", n_entries);
0368       unsigned int first_entry = 0;
0369       while (first_entry < n_entries) {
0370         TTree* copyTree = inputTree->CopyTree("", "", split_mode, first_entry);
0371         RooTreeDataStore store("reader",
0372                                "reader",
0373                                dataVars,
0374                                *copyTree,
0375                                /*selExpr=*/"",
0376                                /*wgtVarName=*/(weightVar.empty() ? nullptr : weightVar.c_str()));
0377         for (unsigned int i = 0; i < store.GetEntries(); ++i) {
0378           store.get(i);
0379           if (allCats.getIndex() == iCat) {
0380             data_bin->add(dataVars, weightVar.empty() ? 1.0 : dataVars.getRealValue(weightVar.c_str()));
0381           }
0382         }
0383         delete copyTree;
0384         first_entry += split_mode;
0385         data_bin->Print("V");
0386       }
0387       data_bin->Print("V");
0388       tmp->cd();
0389 
0390       // Now add all expressions that are computed dynamically
0391       for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0392                ev = expressionVars.begin(),
0393                eve = expressionVars.end();
0394            ev != eve;
0395            ++ev) {
0396         RooArgList args;
0397         for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0398              ++it) {
0399           args.add(dataVars[it->c_str()]);
0400         }
0401         RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0402         RooRealVar* col = (RooRealVar*)data_bin->addColumn(expr);
0403         tmpVars.add(*dataVars.addClone(*col));
0404       }
0405 
0406       // And add all dynamic categories from thresholds
0407       for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0408                                                                                       tce = thresholdCategories.end();
0409            tc != tce;
0410            ++tc) {
0411         RooThresholdCategory tmp(tc->first.first.c_str(),
0412                                  tc->first.second.c_str(),
0413                                  (RooAbsReal&)dataVars[tc->second.first.c_str()],
0414                                  "above",
0415                                  1);
0416         tmp.addThreshold(tc->second.second, "below", 0);
0417         RooCategory* cat = (RooCategory*)data_bin->addColumn(tmp);
0418         tmpVars.add(*dataVars.addClone(*cat));
0419       }
0420 
0421       //setup the efficiency category
0422       if (effCats.size() == 1) {
0423         effName = effCats.front() + "::" + effStates.front();
0424         RooMappedCategory efficiencyCategory(
0425             "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0426         efficiencyCategory.map(effStates.front().c_str(), "Passed");
0427         data_bin->addColumn(efficiencyCategory);
0428       } else {
0429         RooArgSet rooEffCats;
0430         string multiState = "{";
0431         for (size_t i = 0; i < effCats.size(); ++i) {
0432           if (i) {
0433             multiState += ";";
0434             effName += " && ";
0435           }
0436           rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0437           multiState += effStates[i];
0438           effName = effCats[i] + "::" + effStates[i];
0439         }
0440         multiState += "}";
0441         RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0442         RooMappedCategory efficiencyCategory(
0443             "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0444         efficiencyCategory.map(multiState.c_str(), "Passed");
0445         data_bin->addColumn(efficiencyCategory);
0446       }
0447       data_bin->addColumn(pdfCategory);
0448     }
0449 
0450     //set the category variables by reading the first event
0451     const RooArgSet* row = data_bin->get();
0452 
0453     //get PDF name
0454     TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
0455 
0456     //make directory name
0457     TString dirName = catName;
0458     dirName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "__");
0459     if (pdfName.Length() > 0) {
0460       dirName.Append("__").Append(pdfName);
0461     }
0462 
0463     cout << "Fitting bin:  " << dirName << endl;
0464     //make a directory for each bin
0465     gDirectory->mkdir(dirName)->cd();
0466 
0467     //create a workspace
0468     RooWorkspace* w = new RooWorkspace();
0469     //import the data
0470     w->import(*data_bin);
0471     delete data_bin;  // clean up earlier
0472     data_bin =
0473         dynamic_cast<RooDataSet*>(w->data("data"));  // point to the data that's in the workspace now (saves memory)
0474 
0475     //save the distribution of variables
0476     if (doSaveDistributionsPlot)
0477       saveDistributionsPlot(w);
0478     //do the fitting only if there is sufficient number of events
0479     if (data_bin->numEntries() > 0) {
0480       //set the values of binnedVariables to the mean value in this data bin
0481       RooArgSet meanOfVariables;
0482       RooLinkedListIter vit = binnedVariables.iterator();
0483       for (RooRealVar* v = (RooRealVar*)vit.Next(); v != nullptr; v = (RooRealVar*)vit.Next()) {
0484         meanOfVariables.addClone(*v);
0485         double mean = w->data("data")->mean(*v);
0486         RooBinning binning((RooBinning&)v->getBinning());
0487         int ind = binning.binNumber(mean);
0488         RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
0489         newVar.setVal(mean);
0490         newVar.setAsymError(binning.binLow(ind) - mean, binning.binHigh(ind) - mean);
0491       }
0492 
0493       //put an entry in the efficiency dataset
0494       //note that the category values are coming from data_bin->get(0)
0495       meanOfVariables.addClone(*data_bin->get(0), true);
0496 
0497       efficiency.setVal(0);  //reset
0498       efficiency.setAsymError(0, 0);
0499       doFitEfficiency(w, pdfName.Data(), efficiency);
0500       fitEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0501 
0502       /*      efficiency.setVal(0);//reset
0503       doSBSEfficiency(w, efficiency);
0504       sbsEfficiency.add( RooArgSet(meanOfVariables, efficiency) );*/
0505 
0506       efficiency.setVal(0);  //reset
0507       doCntEfficiency(w, efficiency);
0508       cntEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0509     }
0510     //save the workspace if requested
0511     if (saveWorkspace) {
0512       w->Write("w");
0513     }
0514     //clean up
0515     delete w;
0516     if (split_mode)
0517       dataVars.remove(tmpVars);
0518     //get back to the initial directory
0519     gDirectory->cd("..");
0520   }
0521 
0522   //save the efficiency data
0523   fitEfficiency.Write();
0524   gDirectory->mkdir("fit_eff_plots")->cd();
0525   saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
0526   gDirectory->cd("..");
0527 
0528   /*  sbsEfficiency.Write();
0529   gDirectory->mkdir("sbs_eff_plots")->cd();
0530   saveEfficiencyPlots(sbsEfficiency, effCat+"::"+effState, binnedVariables, mappedCategories);
0531   gDirectory->cd("..");*/
0532 
0533   cntEfficiency.Write();
0534   gDirectory->mkdir("cnt_eff_plots")->cd();
0535   saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
0536   gDirectory->cd("..");
0537 
0538   if (not split_mode)
0539     delete data;
0540 
0541   //empty string means no error
0542   return "";
0543 }
0544 
0545 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency) {
0546   //if pdfName is empty skip the fit
0547   if (pdfName == "all") {
0548     return;
0549   }
0550   //create the simultaneous pdf of name pdfName
0551   createPdf(w, pdfs[pdfName]);
0552   //set the initial values for the yields of signal and background
0553   setInitialValues(w);
0554   std::unique_ptr<RooFitResult> res;
0555 
0556   RooAbsData* data = w->data("data");
0557   std::unique_ptr<RooDataHist> bdata;
0558   if (binnedFit) {
0559     // get variables from data, which contain also other binning or expression variables
0560     const RooArgSet* dataObs = data->get(0);
0561     // remove everything which is not a dependency of the pdf
0562     RooArgSet* obs = w->pdf("simPdf")->getObservables(dataObs);
0563     bdata = std::make_unique<RooDataHist>("data_binned", "data_binned", *obs, *data);
0564     w->import(*bdata);
0565     data = w->data("data_binned");
0566     delete obs;
0567   }
0568 
0569   double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0570   double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0571 
0572   RooAbsReal* simNLL = w->pdf("simPdf")->createNLL(*data, Extended(true), NumCPU(numCPU));
0573 
0574   RooMinimizer minimizer(*simNLL);  // we are going to use this for 'scan'
0575   minimizer.setStrategy(1);
0576   minimizer.setProfile(true);
0577   RooProfileLL profileLL("simPdfNLL", "", *simNLL, *w->var("efficiency"));
0578 
0579   //******* The block of code below is to make the fit converge faster.
0580   // ****** This part is OPTIONAL, i.e., off be default. User can activate this
0581   // ****** by setting the following parameters: "fixVars" and "floatShapeParameters"
0582   // ****** Here is the full logic:
0583   /////   ---> if "floatShapeParameters==true" && "fixVars is empty" :
0584   ////             Just perform the fit without any of these optimizations (this is the default logic)
0585   /////   ---> if "floatShapeParameters==true" && "fixVars is NOT empty" :
0586   ////             In each bin, fix the desired parameters and perform the fit to obtain a good starting point.
0587   /////            Then float these parameters and fit again.
0588   ////    ---> if "floatShapeParameters==false" && "fixVars is empty" :
0589   ////             Do not perform any fit at all. Just print error message.
0590   ////    ---> if "floatShapeParameters==false" && "fixVars is NOT empty" :
0591   ///              Perform a global fit to the whole sample, save the fitted values of the
0592   ///              user specified parameters, and fix them for bin-by-bin fit.
0593 
0594   if (!fixVars.empty()) {
0595     // calculate initial values for parameters user want to fix
0596     if (!floatShapeParameters && fixVarValues.empty()) {
0597       // we want to fix these parameters for each bin.
0598       // the following sequence fixes them, fits, releases and fits again
0599       // to get reasonable values.
0600       // ----------------------------------------------------------------------
0601       // This procedure works only once with a whole dataset (without binning)
0602       // ----------------------------------------------------------------------
0603 
0604       // fix them
0605       varFixer(w, true);
0606       //do fit
0607       minimizer.minimize("Minuit2", "Scan");
0608       minimizer.migrad();
0609       minimizer.hesse();
0610       //minuit.minos();
0611       //w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0612       //PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0613       //release vars
0614       varFixer(w, false);
0615       //do fit
0616       minimizer.minimize("Minuit2", "Scan");
0617       minimizer.migrad();
0618       minimizer.hesse();
0619       //minuit.minos();
0620       //w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0621       //PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0622       //save vars
0623       varSaver(w);
0624       // now we have a starting point. Fit will converge faster.
0625     }
0626 
0627     // here we can use initial values if we want (this works for each bin)
0628     if (!floatShapeParameters)
0629       varRestorer(w);  //restore vars
0630 
0631     //do fit
0632     minimizer.minimize("Minuit2", "Scan");
0633     minimizer.migrad();
0634     minimizer.hesse();
0635     // initialize the profile likelihood
0636     profileLL.getVal();
0637     RooMinimizer* profMinuit = profileLL.minimizer();
0638     profMinuit->setProfile(true);
0639     profMinuit->setStrategy(2);
0640     profMinuit->setPrintLevel(1);
0641     profMinuit->minos(*w->var("efficiency"));
0642     res.reset(profMinuit->save());
0643     //res = w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0644     //Minos(*w->var("efficiency")), PrintLevel(quiet?-1:1),
0645     //PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0646   }  //if(!fixVars.empty())
0647 
0648   // (default = true) if we don't want to fix any parameters or want to fit each bin with all parameters floating
0649   if (floatShapeParameters) {
0650     //release vars
0651     varFixer(w, false);
0652 
0653     //do fit
0654     minimizer.minimize("Minuit2", "Scan");
0655     minimizer.migrad();
0656     minimizer.hesse();
0657     res.reset(w->pdf("simPdf")->fitTo(*data,
0658                                       Save(true),
0659                                       Extended(true),
0660                                       NumCPU(numCPU),
0661                                       Strategy(2),
0662                                       Minos(*w->var("efficiency")),
0663                                       PrintLevel(quiet ? -1 : 1),
0664                                       PrintEvalErrors(quiet ? -1 : 1),
0665                                       Warnings(!quiet)));
0666   }
0667 
0668   // save everything
0669   res->Write("fitresults");
0670   w->saveSnapshot("finalState", w->components());
0671   saveFitPlot(w);
0672   //extract the efficiency parameter from the results
0673   RooRealVar* e = (RooRealVar*)res->floatParsFinal().find("efficiency");
0674   //What's wrong with this? and why don't they copy the errors!
0675   //efficiency = *e;
0676   efficiency.setVal(e->getVal());
0677   Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
0678   if (errLo == 0 && e->getVal() < 0.5)
0679     errLo = e->getMin() - e->getVal();
0680   if (errHi == 0 && e->getVal() > 0.5)
0681     errHi = e->getMax() - e->getVal();
0682   efficiency.setAsymError(errLo, errHi);
0683 
0684   if (totPassing * totFailing == 0) {
0685     RooRealVar* nTot = (RooRealVar*)res->floatParsFinal().find("numTot");
0686     RooRealVar* fSig = (RooRealVar*)res->floatParsFinal().find("fSigAll");
0687     double cerr = ROOT::Math::beta_quantile(1 - (1.0 - .68540158589942957) / 2, 1, nTot->getVal() * fSig->getVal());
0688     /*
0689     std::cout << "======================================================================================" << std::endl;
0690     std::cout << "======= totPassing "  << totPassing << ", totFailing " << totFailing << std::endl;
0691     std::cout << "======= FIT: e  "  <<  e->getVal() << ",  e Lo " << e->getErrorLo()  << ",  e Hi " <<  e->getErrorHi() << std::endl;
0692     std::cout << "======= FIT:nS  "  << nS->getVal() << ", nS Lo " << nS->getErrorLo() << ", nS Hi " << nS->getErrorHi() << std::endl;
0693     std::cout << "======= FIT:nB  "  << nB->getVal() << ", nB Lo " << nB->getErrorLo() << ", nB Hi " << nB->getErrorHi() << std::endl;
0694     std::cout << "======= CNT:    "  << cerr << std::endl;
0695     std::cout << "======================================================================================" << std::endl;
0696     */
0697     if (totPassing == 0) {
0698       efficiency.setVal(0);
0699       efficiency.setAsymError(0, cerr);
0700     } else {
0701       efficiency.setVal(1);
0702       efficiency.setAsymError(-cerr, 0);
0703     }
0704   }
0705 
0706   delete simNLL;
0707 }
0708 
0709 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands) {
0710   // create the signal and background pdfs defined by the user
0711   for (unsigned int i = 0; i < pdfCommands.size(); i++) {
0712     const std::string& command = pdfCommands[i];
0713     if (command.find("#import ") == 0) {
0714       TDirectory* here = gDirectory;
0715       w->import(command.substr(8).c_str());
0716       here->cd();
0717     } else {
0718       TDirectory* here = gDirectory;
0719       w->factory(command.c_str());
0720       here->cd();
0721     }
0722   }
0723   // setup the simultaneous extended pdf
0724 
0725   w->factory("expr::nSignalPass('efficiency*fSigAll*numTot', efficiency, fSigAll[.9,0,1],numTot[1,0,1e10])");
0726   w->factory("expr::nSignalFail('(1-efficiency)*fSigAll*numTot', efficiency, fSigAll,numTot)");
0727   w->factory("expr::nBkgPass('effBkg*(1-fSigAll)*numTot', effBkg[.9,0,1],fSigAll,numTot)");
0728   w->factory("expr::nBkgFail('(1-effBkg)*(1-fSigAll)*numTot', effBkg,fSigAll,numTot)");
0729   TString sPass = "signal", sFail = "signal";
0730   if (w->pdf("signalPass") != nullptr && w->pdf("signalFail") != nullptr) {
0731     if (w->pdf("signal") != nullptr)
0732       throw std::logic_error(
0733           "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0734     sPass = "signalPass";
0735     sFail = "signalFail";
0736   } else if (w->pdf("signal") != nullptr) {
0737     if (w->pdf("signalPass") != nullptr || w->pdf("signalFail") != nullptr) {
0738       throw std::logic_error(
0739           "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0740     }
0741   } else {
0742     throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
0743   }
0744   w->factory("SUM::pdfPass(nSignalPass*" + sPass + ", nBkgPass*backgroundPass)");  //fBkgPass*
0745   w->factory("SUM::pdfFail(nSignalFail*" + sFail + ", nBkgFail*backgroundFail)");  //fBkgFail*
0746 
0747   w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
0748   // signalFractionInPassing is not used in the fit just to set the initial values
0749   if (w->pdf("simPdf") == nullptr)
0750     throw std::runtime_error("Could not create simultaneous fit pdf.");
0751   if (w->var("signalFractionInPassing") == nullptr)
0752     w->factory("signalFractionInPassing[0.9]");
0753 }
0754 
0755 void TagProbeFitter::setInitialValues(RooWorkspace* w) {
0756   // calculate initial values
0757   double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0758   double totFailing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0759   //std::cout << "Number of probes: " << totPassing+totFailing << std::endl;
0760 
0761   // now set the values
0762   w->var("numTot")->setVal(totPassing + totFailing);
0763   w->var("numTot")->setMax(2.0 * (totPassing + totFailing) + 10);  //wiggle room in case of 0 events in bin
0764 
0765   if (totPassing == 0) {
0766     w->var("efficiency")->setVal(0.0);
0767     w->var("efficiency")->setAsymError(0, 1);
0768     w->var("efficiency")->setConstant(false);
0769   } else if (totFailing == 0) {
0770     w->var("efficiency")->setVal(1.0);
0771     w->var("efficiency")->setAsymError(-1, 0);
0772     w->var("efficiency")->setConstant(false);
0773   } else {
0774     w->var("efficiency")->setConstant(false);
0775   }
0776 
0777   // save initial state for reference
0778   w->saveSnapshot("initialState", w->components());
0779 }
0780 
0781 void TagProbeFitter::saveFitPlot(RooWorkspace* w) {
0782   // save refferences for convenience
0783   RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
0784   RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data"));
0785   RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0786   RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0787   RooAbsPdf& pdf = *w->pdf("simPdf");
0788   std::unique_ptr<RooArgSet> obs(pdf.getObservables(*dataAll));
0789   RooRealVar* mass = nullptr;
0790   RooLinkedListIter it = obs->iterator();
0791   for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0792     if (!v->InheritsFrom("RooRealVar"))
0793       continue;
0794     mass = (RooRealVar*)v;
0795     break;
0796   }
0797   if (!mass)
0798     return;
0799   // make a 2x2 canvas
0800   TCanvas canvas("fit_canvas");
0801   canvas.Divide(2, 2);
0802   vector<RooPlot*> frames;
0803   // plot the Passing Probes
0804   canvas.cd(1);
0805   if (massBins == 0) {
0806     frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
0807     frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
0808     frames.push_back(mass->frame(Name("All"), Title("All Probes")));
0809   } else {
0810     frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
0811     frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
0812     frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
0813   }
0814   dataPass->plotOn(frames[0]);
0815   pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
0816   pdf.plotOn(frames[0],
0817              Slice(efficiencyCategory, "Passed"),
0818              ProjWData(*dataPass),
0819              LineColor(kGreen),
0820              Components("backgroundPass"),
0821              LineStyle(kDashed));
0822   frames[0]->Draw();
0823   // plot the Failing Probes
0824   canvas.cd(2);
0825   dataFail->plotOn(frames[1]);
0826   pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
0827   pdf.plotOn(frames[1],
0828              Slice(efficiencyCategory, "Failed"),
0829              ProjWData(*dataFail),
0830              LineColor(kRed),
0831              Components("backgroundFail"),
0832              LineStyle(kDashed));
0833   frames[1]->Draw();
0834   // plot the All Probes
0835   canvas.cd(3);
0836   dataAll->plotOn(frames.back());
0837   pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
0838   pdf.plotOn(frames.back(),
0839              ProjWData(*dataAll),
0840              LineColor(kBlue),
0841              Components("backgroundPass,backgroundFail"),
0842              LineStyle(kDashed));
0843   frames.back()->Draw();
0844   // plot the Fit Results
0845   canvas.cd(4);
0846   frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
0847   pdf.paramOn(frames.back(), dataAll, "", 0, "NELU", 0.1, 0.9, 0.9);
0848   // draw only the parameter box not the whole frame
0849   frames.back()->findObject(Form("%s_paramBox", pdf.GetName()))->Draw();
0850   //save and clean up
0851   canvas.Draw();
0852   canvas.Write();
0853   for (size_t i = 0; i < frames.size(); i++) {
0854     delete frames[i];
0855   }
0856   delete dataPass;
0857   delete dataFail;
0858 }
0859 
0860 void TagProbeFitter::saveDistributionsPlot(RooWorkspace* w) {
0861   // save pointers to datasets to manage memory
0862   RooAbsData* dataAll = w->data("data");
0863   RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0864   RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0865 
0866   const RooArgSet* vars = dataAll->get();
0867   vector<RooRealVar*> reals;
0868   RooLinkedListIter it = vars->iterator();
0869   for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0870     if (!v->InheritsFrom("RooRealVar"))
0871       continue;
0872     reals.push_back((RooRealVar*)v);
0873   }
0874   TCanvas canvas("distributions_canvas");
0875   canvas.Divide(3, reals.size());
0876   vector<RooPlot*> frames;
0877   for (unsigned int i = 0; i < reals.size(); i++) {
0878     // plot the Passing Probes
0879     canvas.cd(3 * i + 1);
0880     frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
0881     dataPass->plotOn(frames.back(), LineColor(kGreen));
0882     dataPass->statOn(frames.back());
0883     frames.back()->Draw();
0884     // plot the Failing Probes
0885     canvas.cd(3 * i + 2);
0886     frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
0887     dataFail->plotOn(frames.back(), LineColor(kRed));
0888     dataFail->statOn(frames.back());
0889     frames.back()->Draw();
0890     // plot the All Probes
0891     canvas.cd(3 * i + 3);
0892     frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
0893     dataAll->plotOn(frames.back(), LineColor(kBlue));
0894     dataAll->statOn(frames.back());
0895     frames.back()->Draw();
0896   }
0897   canvas.Draw();
0898   canvas.Write();
0899   for (size_t i = 0; i < frames.size(); i++) {
0900     delete frames[i];
0901   }
0902   delete dataPass;
0903   delete dataFail;
0904 }
0905 
0906 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff,
0907                                          const TString& effName,
0908                                          RooArgSet& binnedVariables,
0909                                          RooArgSet& mappedCategories) {
0910   RooLinkedListIter v1it = binnedVariables.iterator();
0911   bool isOnePoint =
0912       (eff.numEntries() == 1);  // for datasets with > 1 entry, we don't make plots for variables with just one bin
0913   for (RooRealVar* v1 = (RooRealVar*)v1it.Next(); v1 != nullptr; v1 = (RooRealVar*)v1it.Next()) {
0914     RooArgSet binCategories1D;
0915     if (v1->numBins() == 1 && !isOnePoint)
0916       continue;
0917     RooLinkedListIter v2it = binnedVariables.iterator();
0918     for (RooRealVar* v2 = (RooRealVar*)v2it.Next(); v2 != nullptr; v2 = (RooRealVar*)v2it.Next()) {
0919       if (v2 == v1)
0920         continue;
0921       if (v2->numBins() == 1 && !isOnePoint)
0922         continue;
0923       binCategories1D.addClone(
0924           RooBinningCategory(TString(v2->GetName()) + "_bins", TString(v2->GetName()) + "_bins", *v2));
0925 
0926       RooArgSet binCategories2D;
0927       RooLinkedListIter v3it = binnedVariables.iterator();
0928       for (RooRealVar* v3 = (RooRealVar*)v3it.Next(); v3 != nullptr; v3 = (RooRealVar*)v3it.Next()) {
0929         if (v3 == v1 || v3 == v2)
0930           continue;
0931         binCategories2D.addClone(
0932             RooBinningCategory(TString(v3->GetName()) + "_bins", TString(v3->GetName()) + "_bins", *v3));
0933       }
0934       RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
0935       if (allCats2D.numTypes() == 0) {
0936         makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
0937       } else {
0938         RooDataSet myEff(eff);
0939         myEff.addColumn(allCats2D);
0940         std::unique_ptr<TIterator> catIt(allCats2D.typeIterator());
0941         for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0942           TString catName = t->GetName();
0943           if (catName.Contains("NotMapped"))
0944             continue;
0945           catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0946           makeEfficiencyPlot2D(myEff,
0947                                *v1,
0948                                *v2,
0949                                TString::Format("%s_%s_PLOT_%s", v1->GetName(), v2->GetName(), catName.Data()),
0950                                catName,
0951                                effName,
0952                                "allCats2D",
0953                                t->getVal());
0954         }
0955       }
0956     }
0957     RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
0958     if (allCats1D.numTypes() == 0) {
0959       makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
0960     } else {
0961       RooDataSet myEff(eff);
0962       myEff.addColumn(allCats1D);
0963       std::unique_ptr<TIterator> catIt(allCats1D.typeIterator());
0964       for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0965         TString catName = t->GetName();
0966         if (catName.Contains("NotMapped"))
0967           continue;
0968         catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0969         makeEfficiencyPlot1D(myEff,
0970                              *v1,
0971                              TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data()),
0972                              catName,
0973                              effName,
0974                              "allCats1D",
0975                              t->getVal());
0976       }
0977     }
0978   }
0979 }
0980 
0981 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff,
0982                                           RooRealVar& v,
0983                                           const TString& plotName,
0984                                           const TString& plotTitle,
0985                                           const TString& effName,
0986                                           const char* catName,
0987                                           int catIndex) {
0988   TGraphAsymmErrors* p = new TGraphAsymmErrors();
0989   const RooArgSet* entry = eff.get();
0990   const RooRealVar& vi = dynamic_cast<const RooRealVar&>(*entry->find(v.GetName()));
0991   const RooRealVar& ei = dynamic_cast<const RooRealVar&>(*entry->find("efficiency"));
0992   for (unsigned int i = 0, n = eff.numEntries(); i < n; ++i) {
0993     entry = eff.get(i);
0994     if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
0995       continue;
0996     int j = p->GetN();
0997     p->Set(j + 1);
0998     p->SetPoint(j, vi.getVal(), ei.getVal());
0999     p->SetPointError(j, -vi.getAsymErrorLo(), vi.getAsymErrorHi(), -ei.getAsymErrorLo(), ei.getAsymErrorHi());
1000   }
1001   TCanvas canvas(plotName);
1002   TH1F* frame = new TH1F("frame", "Efficiency of " + effName, 1, v.getMin(), v.getMax());
1003   frame->SetDirectory(nullptr);
1004   p->SetNameTitle(Form("hxy_%s", eff.GetName()), "Efficiency of " + effName);
1005   p->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1006   p->GetYaxis()->SetTitle("Efficiency of " + effName);
1007   frame->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1008   frame->GetYaxis()->SetTitle("Efficiency of " + effName);
1009   frame->GetYaxis()->SetRangeUser(0, 1);
1010   frame->Draw();
1011   p->SetLineWidth(2);
1012   p->SetMarkerStyle(kFullCircle);
1013   p->SetMarkerSize(1.2);
1014   p->Draw("P SAME");
1015   canvas.Write();
1016   delete frame;
1017   delete p;
1018 }
1019 
1020 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff,
1021                                           RooRealVar& v1,
1022                                           RooRealVar& v2,
1023                                           const TString& plotName,
1024                                           const TString& plotTitle,
1025                                           const TString& effName,
1026                                           const char* catName,
1027                                           int catIndex) {
1028   TCanvas canvas(plotName);
1029   canvas.SetRightMargin(0.15);
1030   TH2F* h = new TH2F(plotName,
1031                      plotName,
1032                      v1.getBinning().numBins(),
1033                      v1.getBinning().array(),
1034                      v2.getBinning().numBins(),
1035                      v2.getBinning().array());
1036   const RooArgSet* set = eff.get();
1037   RooRealVar* e = (RooRealVar*)set->find("efficiency");
1038   RooRealVar* v1_ = (RooRealVar*)set->find(v1.GetName());
1039   RooRealVar* v2_ = (RooRealVar*)set->find(v2.GetName());
1040   h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s",
1041                               plotTitle.Data(),
1042                               v1.GetTitle(),
1043                               TString(v1.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v1.getUnit()).Data(),
1044                               v2.GetTitle(),
1045                               TString(v2.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v2.getUnit()).Data(),
1046                               effName.Data()));
1047   h->SetOption("colztexte");
1048   h->GetZaxis()->SetRangeUser(-0.001, 1.001);
1049   h->SetStats(kFALSE);
1050   for (int i = 0; i < eff.numEntries(); i++) {
1051     const RooArgSet* entry = eff.get(i);
1052     if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1053       continue;
1054     h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
1055     h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi() - e->getErrorLo()) / 2.);
1056   }
1057   h->Draw();
1058   canvas.Draw();
1059   canvas.Write();
1060   delete h;
1061 }
1062 
1063 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency) {}
1064 
1065 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency) {
1066   int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
1067   int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
1068   double e = (pass + fail == 0) ? 0 : pass / double(pass + fail);
1069   // Use Clopper-Pearson
1070   double alpha = (1.0 - .68540158589942957) / 2;
1071   double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile(alpha, pass, fail + 1);
1072   double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile(1 - alpha, pass + 1, fail);
1073   ////from TGraphAsymmErrors
1074   //double lob, hib;
1075   //Efficiency( pass, pass+fail, .68540158589942957, e, lob, hib );
1076   //std::cerr << "CNT " << pass << "/" << fail << ":  Clopper Pearson [" << lo << ", "  << hi << "], Bayes [" << lob << ", " << hib << "]" << std::endl;
1077   efficiency.setVal(e);
1078   efficiency.setAsymError(lo - e, hi - e);
1079 }
1080 
1081 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix) {
1082   std::vector<std::string>::const_iterator it;
1083   for (it = fixVars.begin(); it < fixVars.end(); it++) {
1084     if (w->var((*it).c_str()))
1085       w->var((*it).c_str())->setAttribute("Constant", fix);
1086     else {
1087       std::cout << "TagProbeFitter: "
1088                 << "Can't find a variable to fix: " << *it;
1089     }
1090   }
1091 }
1092 
1093 void TagProbeFitter::varSaver(RooWorkspace* w) {
1094   if (!fixVarValues.empty()) {
1095     std::cout << "attempt to save variables more than once!" << std::endl;
1096     return;
1097   }
1098   std::vector<std::string>::const_iterator it;
1099   for (it = fixVars.begin(); it < fixVars.end(); it++) {
1100     fixVarValues.push_back(w->var((*it).c_str())->getVal());
1101   }
1102 }
1103 
1104 void TagProbeFitter::varRestorer(RooWorkspace* w) {
1105   if (fixVarValues.size() == fixVars.size())
1106     for (unsigned int i = 0; i < fixVars.size(); i++) {
1107       std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
1108       w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
1109     }
1110   else {
1111     std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
1112   }
1113 }