Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-07-17 01:47:50

0001 #include "PhysicsTools/TagAndProbe/interface/TagProbeFitter.h"
0002 #include <memory>
0003 
0004 #include <stdexcept>
0005 //#include "TagProbeFitter.h"
0006 
0007 #include "TROOT.h"
0008 #include "TFile.h"
0009 #include "TPad.h"
0010 #include "TText.h"
0011 #include "TCanvas.h"
0012 #include "TGraphAsymmErrors.h"
0013 #include "TH2F.h"
0014 #include "TStyle.h"
0015 #include "Math/QuantFuncMathCore.h"
0016 #include "Roo1DTable.h"
0017 #include "RooAbsDataStore.h"
0018 #include "RooAbsReal.h"
0019 #include "RooAddPdf.h"
0020 #include "RooBinning.h"
0021 #include "RooBinningCategory.h"
0022 #include "RooCategory.h"
0023 #include "RooChebychev.h"
0024 #include "RooDataHist.h"
0025 #include "RooDataSet.h"
0026 #include "RooEfficiency.h"
0027 #include "RooExtendPdf.h"
0028 #include "RooFitResult.h"
0029 #include "RooFormulaVar.h"
0030 #include "RooGaussian.h"
0031 #include "RooGenericPdf.h"
0032 #include "RooGlobalFunc.h"
0033 #include "RooLinkedListIter.h"
0034 #include "RooMappedCategory.h"
0035 #include "RooMinimizer.h"
0036 #include "RooMsgService.h"
0037 #include "RooMultiCategory.h"
0038 #include "RooNLLVar.h"
0039 #include "RooNumIntConfig.h"
0040 #include "RooPlot.h"
0041 #include "RooProdPdf.h"
0042 #include "RooProfileLL.h"
0043 #include "RooRealVar.h"
0044 #include "RooSimultaneous.h"
0045 #include "RooThresholdCategory.h"
0046 #include "RooTrace.h"
0047 #include "RooWorkspace.h"
0048 #include "RooTreeDataStore.h"
0049 
0050 using namespace std;
0051 using namespace RooFit;
0052 
0053 TagProbeFitter::TagProbeFitter(const std::vector<std::string>& inputFileNames,
0054                                string inputDirectoryName,
0055                                string inputTreeName,
0056                                string outputFileName,
0057                                int numCPU_,
0058                                bool saveWorkspace_,
0059                                bool floatShapeParameters_,
0060                                const std::vector<std::string>& fixVars_) {
0061   inputTree = new TChain((inputDirectoryName + "/" + inputTreeName).c_str());
0062   for (size_t i = 0; i < inputFileNames.size(); i++) {
0063     inputTree->Add(inputFileNames[i].c_str());
0064   }
0065   outputFile = new TFile(outputFileName.c_str(), "recreate");
0066   outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
0067   numCPU = numCPU_;
0068   saveWorkspace = saveWorkspace_;
0069   massBins = 0;  // automatic default
0070   floatShapeParameters = floatShapeParameters_;
0071   fixVars = fixVars_;
0072   weightVar = "";
0073   if (!floatShapeParameters && fixVars.empty())
0074     std::cout << "TagProbeFitter: "
0075               << "You wnat to fix some variables but do not specify them!";
0076 
0077   gROOT->SetStyle("Plain");
0078   gStyle->SetTitleFillColor(0);
0079   gStyle->SetPalette(1);
0080   gStyle->SetOptStat(0);
0081   gStyle->SetPaintTextFormat(".2f");
0082 
0083   quiet = false;
0084 
0085   binnedFit = false;
0086 
0087   doSaveDistributionsPlot = true;
0088 
0089   // make integration very precise
0090   RooAbsReal::defaultIntegratorConfig()->setEpsAbs(1e-13);
0091   RooAbsReal::defaultIntegratorConfig()->setEpsRel(1e-13);
0092 
0093   split_mode = 0;
0094 }
0095 
0096 TagProbeFitter::~TagProbeFitter() {
0097   if (inputTree)
0098     delete inputTree;
0099   if (outputFile)
0100     outputFile->Close();
0101 }
0102 
0103 void TagProbeFitter::setQuiet(bool quiet_) {
0104   quiet = quiet_;
0105   if (quiet) {
0106     RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
0107   } else {
0108     RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
0109   }
0110 }
0111 
0112 void TagProbeFitter::setSplitMode(unsigned int nevents) { split_mode = nevents; }
0113 
0114 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units) {
0115   RooRealVar temp(name.c_str(), title.c_str(), low, hi, units.c_str());
0116   temp.setBins(5000, "cache");
0117   variables.addClone(temp);
0118   return true;
0119 }
0120 
0121 bool TagProbeFitter::addCategory(string name, string title, string expression) {
0122   RooCategory* c = (RooCategory*)parameterParser.factory(expression.c_str());
0123   if (!c)
0124     return false;
0125   //set the name of the category to the passed name instead of the one in the expression
0126   c->SetName(name.c_str());
0127   c->SetTitle(title.c_str());
0128   variables.addClone(*c);
0129   return true;
0130 }
0131 
0132 bool TagProbeFitter::addExpression(string expressionName,
0133                                    string title,
0134                                    string expression,
0135                                    const std::vector<string>& arguments) {
0136   expressionVars.push_back(make_pair(make_pair(expressionName, title), make_pair(expression, arguments)));
0137   return true;
0138 }
0139 
0140 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue) {
0141   thresholdCategories.push_back(make_pair(make_pair(categoryName, title), make_pair(varName, cutValue)));
0142   return true;
0143 }
0144 
0145 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands) { pdfs[name] = pdfCommands; }
0146 
0147 void TagProbeFitter::setBinsForMassPlots(int bins) { massBins = bins; }
0148 
0149 void TagProbeFitter::setWeightVar(const std::string& var) { weightVar = var; }
0150 
0151 string TagProbeFitter::calculateEfficiency(string dirName,
0152                                            const std::vector<string>& effCats,
0153                                            const std::vector<string>& effStates,
0154                                            vector<string>& unbinnedVariables,
0155                                            map<string, vector<double> >& binnedReals,
0156                                            map<string, std::vector<string> >& binnedCategories,
0157                                            vector<string>& binToPDFmap) {
0158   //go to home directory
0159   outputDirectory->cd();
0160   //make a directory corresponding to this efficiency binning
0161   gDirectory->mkdir(dirName.c_str())->cd();
0162 
0163   RooArgSet dataVars;
0164 
0165   //collect unbinned variables
0166   for (vector<string>::iterator v = unbinnedVariables.begin(); v != unbinnedVariables.end(); v++) {
0167     dataVars.addClone(variables[v->c_str()], true);
0168     if (binnedFit && (v == unbinnedVariables.begin())) {
0169       ((RooRealVar&)dataVars[v->c_str()]).setBins(massBins);
0170     }
0171   }
0172   //collect the binned variables and the corresponding bin categories
0173   RooArgSet binnedVariables;
0174   RooArgSet binCategories;
0175   for (map<string, vector<double> >::iterator v = binnedReals.begin(); v != binnedReals.end(); v++) {
0176     TString name = v->first;
0177     if (variables.find(name) == nullptr) {
0178       cerr << "Binned variable '" << name << "' not found." << endl;
0179       return "Error";
0180     }
0181     binnedVariables.add(*dataVars.addClone(variables[name]));
0182     ((RooRealVar&)binnedVariables[name]).setBinning(RooBinning(v->second.size() - 1, &v->second[0]));
0183     binCategories.addClone(RooBinningCategory(name + "_bins", name + "_bins", (RooRealVar&)binnedVariables[name]));
0184   }
0185 
0186   //collect the category variables and the corresponding mapped categories
0187   RooArgSet categories;
0188   RooArgSet mappedCategories;
0189   for (map<string, vector<string> >::iterator v = binnedCategories.begin(); v != binnedCategories.end(); v++) {
0190     TString name = v->first;
0191     if (variables.find(name) == nullptr) {
0192       cerr << "Binned category '" << name << "' not found." << endl;
0193       return "Error";
0194     }
0195     categories.add(*dataVars.addClone(variables[name]));
0196     mappedCategories.addClone(RooMappedCategory(name + "_bins", name + "_bins", (RooCategory&)categories[name]));
0197     for (unsigned int i = 0; i < v->second.size(); i++) {
0198       ((RooMappedCategory&)mappedCategories[name + "_bins"])
0199           .map(v->second[i].c_str(), name + "_" + TString(v->second[i].c_str()).ReplaceAll(",", "_"));
0200     }
0201   }
0202 
0203   // add the efficiency category if it's not a dynamic one
0204   for (vector<string>::const_iterator effCat = effCats.begin(); effCat != effCats.end(); ++effCat) {
0205     if (variables.find(effCat->c_str()) != nullptr) {
0206       dataVars.addClone(variables[effCat->c_str()], true);
0207     }
0208   }
0209 
0210   //  add all variables used in expressions
0211   for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(),
0212                                                                                            eve = expressionVars.end();
0213        ev != eve;
0214        ++ev) {
0215     for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
0216       // provided that they are real variables themselves
0217       if (variables.find(it->c_str()))
0218         dataVars.addClone(variables[it->c_str()], true);
0219     }
0220   }
0221   // add all real variables used in cuts
0222   for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0223                                                                                   tce = thresholdCategories.end();
0224        tc != tce;
0225        ++tc) {
0226     if (variables.find(tc->second.first.c_str()))
0227       dataVars.addClone(variables[tc->second.first.c_str()], true);
0228   }
0229 
0230   //now add the necessary mass and passing variables to make the unbinned RooDataSet
0231   RooDataSet* data(nullptr);
0232   if (not split_mode) {
0233     data = new RooDataSet("data",
0234                           "data",
0235                           inputTree,
0236                           dataVars,
0237                           /*selExpr=*/"",
0238                           /*wgtVarName=*/(weightVar.empty() ? nullptr : weightVar.c_str()));
0239 
0240     // Now add all expressions that are computed dynamically
0241     for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0242              ev = expressionVars.begin(),
0243              eve = expressionVars.end();
0244          ev != eve;
0245          ++ev) {
0246       RooArgList args;
0247       for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0248            ++it) {
0249         args.add(dataVars[it->c_str()]);
0250       }
0251       RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0252       RooRealVar* col = (RooRealVar*)data->addColumn(expr);
0253       dataVars.addClone(*col);
0254     }
0255 
0256     // And add all dynamic categories from thresholds
0257     for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0258                                                                                     tce = thresholdCategories.end();
0259          tc != tce;
0260          ++tc) {
0261       RooThresholdCategory tmp(tc->first.first.c_str(),
0262                                tc->first.second.c_str(),
0263                                (RooAbsReal&)dataVars[tc->second.first.c_str()],
0264                                "above",
0265                                1);
0266       tmp.addThreshold(tc->second.second, "below", 0);
0267       RooCategory* cat = (RooCategory*)data->addColumn(tmp);
0268       dataVars.addClone(*cat);
0269     }
0270   }
0271 
0272   //merge the bin categories to a MultiCategory for convenience
0273   RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
0274   string effName;
0275 
0276   if (not split_mode) {
0277     data->addColumn(allCats);
0278     //setup the efficiency category
0279     if (effCats.size() == 1) {
0280       effName = effCats.front() + "::" + effStates.front();
0281       RooMappedCategory efficiencyCategory(
0282           "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0283       efficiencyCategory.map(effStates.front().c_str(), "Passed");
0284       data->addColumn(efficiencyCategory);
0285     } else {
0286       RooArgSet rooEffCats;
0287       string multiState = "{";
0288       for (size_t i = 0; i < effCats.size(); ++i) {
0289         if (i) {
0290           multiState += ";";
0291           effName += " && ";
0292         }
0293         rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0294         multiState += effStates[i];
0295         effName = effCats[i] + "::" + effStates[i];
0296       }
0297       multiState += "}";
0298       RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0299       RooMappedCategory efficiencyCategory(
0300           "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0301       efficiencyCategory.map(multiState.c_str(), "Passed");
0302       data->addColumn(efficiencyCategory);
0303     }
0304   }
0305 
0306   //setup the pdf category
0307   RooMappedCategory pdfCategory(
0308       "_pdfCategory_", "_pdfCategory_", allCats, (!binToPDFmap.empty()) ? binToPDFmap[0].c_str() : "all");
0309   for (unsigned int i = 1; i < binToPDFmap.size(); i += 2) {
0310     pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i + 1].c_str());
0311   }
0312   if (not split_mode)
0313     data->addColumn(pdfCategory);
0314 
0315   //create the empty efficiency datasets from the binned variables
0316   RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
0317 
0318   RooDataSet fitEfficiency("fit_eff",
0319                            "Efficiency from unbinned ML fit",
0320                            RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0321                            StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0322 
0323   RooDataSet cntEfficiency("cnt_eff",
0324                            "Efficiency from counting",
0325                            RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0326                            StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0327 
0328   if (not split_mode) {
0329     if (!floatShapeParameters) {
0330       //fitting whole dataset to get initial values for some parameters
0331       RooWorkspace* w = new RooWorkspace();
0332       w->import(*data);
0333       efficiency.setVal(0);  //reset
0334       efficiency.setAsymError(0, 0);
0335       std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
0336       doFitEfficiency(w, pdfCategory.getLabel(), efficiency);
0337       delete w;
0338     }
0339   } else {
0340     // disactive not needed branches
0341     inputTree->SetBranchStatus("*", false);
0342     TIterator* iter = dataVars.createIterator();
0343     TObject* obj(nullptr);
0344     while ((obj = iter->Next()))
0345       inputTree->SetBranchStatus(obj->GetName(), true);
0346   }
0347 
0348   // loop over all bins with the help of allCats
0349   // store index values in a separate vector to avoid issues
0350   // with iteration over changing allCats object
0351   std::vector<Int_t> allCatValues;
0352   TIterator* it = allCats.typeIterator();
0353   for (RooCatType* t = (RooCatType*)it->Next(); t != nullptr; t = (RooCatType*)it->Next())
0354     allCatValues.push_back(t->getVal());
0355   for (auto iCat : allCatValues) {
0356     const RooCatType* t = allCats.lookupType(iCat);
0357     //name of the multicategory
0358     TString catName = t->GetName();
0359     //skip unmapped states
0360     if (catName.Contains("NotMapped"))
0361       continue;
0362 
0363     RooDataSet* data_bin(nullptr);
0364     RooArgSet tmpVars;
0365 
0366     if (not split_mode) {
0367       //create the dataset
0368       data_bin = (RooDataSet*)data->reduce(Cut(TString::Format("allCats==%d", iCat)));
0369     } else {
0370       data_bin = new RooDataSet("data", "data", dataVars, (weightVar.empty() ? nullptr : weightVar.c_str()));
0371 
0372       TDirectory* tmp = gDirectory;
0373       gROOT->cd();
0374 
0375       // loop over input data and fill the dataset with events for
0376       // current category
0377       unsigned int n_entries = inputTree->GetEntries();
0378       printf("Input number of events: %u\n", n_entries);
0379       unsigned int first_entry = 0;
0380       while (first_entry < n_entries) {
0381         TTree* copyTree = inputTree->CopyTree("", "", split_mode, first_entry);
0382         RooTreeDataStore store("reader",
0383                                "reader",
0384                                dataVars,
0385                                *copyTree,
0386                                /*selExpr=*/"",
0387                                /*wgtVarName=*/(weightVar.empty() ? nullptr : weightVar.c_str()));
0388         for (unsigned int i = 0; i < store.GetEntries(); ++i) {
0389           store.get(i);
0390           if (allCats.getIndex() == iCat) {
0391             data_bin->add(dataVars, weightVar.empty() ? 1.0 : dataVars.getRealValue(weightVar.c_str()));
0392           }
0393         }
0394         delete copyTree;
0395         first_entry += split_mode;
0396         data_bin->Print("V");
0397       }
0398       data_bin->Print("V");
0399       tmp->cd();
0400 
0401       // Now add all expressions that are computed dynamically
0402       for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0403                ev = expressionVars.begin(),
0404                eve = expressionVars.end();
0405            ev != eve;
0406            ++ev) {
0407         RooArgList args;
0408         for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0409              ++it) {
0410           args.add(dataVars[it->c_str()]);
0411         }
0412         RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0413         RooRealVar* col = (RooRealVar*)data_bin->addColumn(expr);
0414         tmpVars.add(*dataVars.addClone(*col));
0415       }
0416 
0417       // And add all dynamic categories from thresholds
0418       for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0419                                                                                       tce = thresholdCategories.end();
0420            tc != tce;
0421            ++tc) {
0422         RooThresholdCategory tmp(tc->first.first.c_str(),
0423                                  tc->first.second.c_str(),
0424                                  (RooAbsReal&)dataVars[tc->second.first.c_str()],
0425                                  "above",
0426                                  1);
0427         tmp.addThreshold(tc->second.second, "below", 0);
0428         RooCategory* cat = (RooCategory*)data_bin->addColumn(tmp);
0429         tmpVars.add(*dataVars.addClone(*cat));
0430       }
0431 
0432       //setup the efficiency category
0433       if (effCats.size() == 1) {
0434         effName = effCats.front() + "::" + effStates.front();
0435         RooMappedCategory efficiencyCategory(
0436             "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0437         efficiencyCategory.map(effStates.front().c_str(), "Passed");
0438         data_bin->addColumn(efficiencyCategory);
0439       } else {
0440         RooArgSet rooEffCats;
0441         string multiState = "{";
0442         for (size_t i = 0; i < effCats.size(); ++i) {
0443           if (i) {
0444             multiState += ";";
0445             effName += " && ";
0446           }
0447           rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0448           multiState += effStates[i];
0449           effName = effCats[i] + "::" + effStates[i];
0450         }
0451         multiState += "}";
0452         RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0453         RooMappedCategory efficiencyCategory(
0454             "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0455         efficiencyCategory.map(multiState.c_str(), "Passed");
0456         data_bin->addColumn(efficiencyCategory);
0457       }
0458       data_bin->addColumn(pdfCategory);
0459     }
0460 
0461     //set the category variables by reading the first event
0462     const RooArgSet* row = data_bin->get();
0463 
0464     //get PDF name
0465     TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
0466 
0467     //make directory name
0468     TString dirName = catName;
0469     dirName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "__");
0470     if (pdfName.Length() > 0) {
0471       dirName.Append("__").Append(pdfName);
0472     }
0473 
0474     cout << "Fitting bin:  " << dirName << endl;
0475     //make a directory for each bin
0476     gDirectory->mkdir(dirName)->cd();
0477 
0478     //create a workspace
0479     RooWorkspace* w = new RooWorkspace();
0480     //import the data
0481     w->import(*data_bin);
0482     delete data_bin;  // clean up earlier
0483     data_bin =
0484         dynamic_cast<RooDataSet*>(w->data("data"));  // point to the data that's in the workspace now (saves memory)
0485 
0486     //save the distribution of variables
0487     if (doSaveDistributionsPlot)
0488       saveDistributionsPlot(w);
0489     //do the fitting only if there is sufficient number of events
0490     if (data_bin->numEntries() > 0) {
0491       //set the values of binnedVariables to the mean value in this data bin
0492       RooArgSet meanOfVariables;
0493       RooLinkedListIter vit = binnedVariables.iterator();
0494       for (RooRealVar* v = (RooRealVar*)vit.Next(); v != nullptr; v = (RooRealVar*)vit.Next()) {
0495         meanOfVariables.addClone(*v);
0496         double mean = w->data("data")->mean(*v);
0497         RooBinning binning((RooBinning&)v->getBinning());
0498         int ind = binning.binNumber(mean);
0499         RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
0500         newVar.setVal(mean);
0501         newVar.setAsymError(binning.binLow(ind) - mean, binning.binHigh(ind) - mean);
0502       }
0503 
0504       //put an entry in the efficiency dataset
0505       //note that the category values are coming from data_bin->get(0)
0506       meanOfVariables.addClone(*data_bin->get(0), true);
0507 
0508       efficiency.setVal(0);  //reset
0509       efficiency.setAsymError(0, 0);
0510       doFitEfficiency(w, pdfName.Data(), efficiency);
0511       fitEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0512 
0513       /*      efficiency.setVal(0);//reset
0514       doSBSEfficiency(w, efficiency);
0515       sbsEfficiency.add( RooArgSet(meanOfVariables, efficiency) );*/
0516 
0517       efficiency.setVal(0);  //reset
0518       doCntEfficiency(w, efficiency);
0519       cntEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0520     }
0521     //save the workspace if requested
0522     if (saveWorkspace) {
0523       w->Write("w");
0524     }
0525     //clean up
0526     delete w;
0527     if (split_mode)
0528       dataVars.remove(tmpVars);
0529     //get back to the initial directory
0530     gDirectory->cd("..");
0531   }
0532 
0533   //save the efficiency data
0534   fitEfficiency.Write();
0535   gDirectory->mkdir("fit_eff_plots")->cd();
0536   saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
0537   gDirectory->cd("..");
0538 
0539   /*  sbsEfficiency.Write();
0540   gDirectory->mkdir("sbs_eff_plots")->cd();
0541   saveEfficiencyPlots(sbsEfficiency, effCat+"::"+effState, binnedVariables, mappedCategories);
0542   gDirectory->cd("..");*/
0543 
0544   cntEfficiency.Write();
0545   gDirectory->mkdir("cnt_eff_plots")->cd();
0546   saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
0547   gDirectory->cd("..");
0548 
0549   if (not split_mode)
0550     delete data;
0551 
0552   //empty string means no error
0553   return "";
0554 }
0555 
0556 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency) {
0557   //if pdfName is empty skip the fit
0558   if (pdfName == "all") {
0559     return;
0560   }
0561   //create the simultaneous pdf of name pdfName
0562   createPdf(w, pdfs[pdfName]);
0563   //set the initial values for the yields of signal and background
0564   setInitialValues(w);
0565   std::unique_ptr<RooFitResult> res;
0566 
0567   RooAbsData* data = w->data("data");
0568   std::unique_ptr<RooDataHist> bdata;
0569   if (binnedFit) {
0570     // get variables from data, which contain also other binning or expression variables
0571     const RooArgSet* dataObs = data->get(0);
0572     // remove everything which is not a dependency of the pdf
0573     RooArgSet* obs = w->pdf("simPdf")->getObservables(dataObs);
0574     bdata = std::make_unique<RooDataHist>("data_binned", "data_binned", *obs, *data);
0575     w->import(*bdata);
0576     data = w->data("data_binned");
0577     delete obs;
0578   }
0579 
0580   double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0581   double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0582 
0583   RooAbsReal* simNLL = w->pdf("simPdf")->createNLL(*data, Extended(true), NumCPU(numCPU));
0584 
0585   RooMinimizer minimizer(*simNLL);  // we are going to use this for 'scan'
0586   minimizer.setStrategy(1);
0587   minimizer.setProfile(true);
0588   RooProfileLL profileLL("simPdfNLL", "", *simNLL, *w->var("efficiency"));
0589 
0590   //******* The block of code below is to make the fit converge faster.
0591   // ****** This part is OPTIONAL, i.e., off be default. User can activate this
0592   // ****** by setting the following parameters: "fixVars" and "floatShapeParameters"
0593   // ****** Here is the full logic:
0594   /////   ---> if "floatShapeParameters==true" && "fixVars is empty" :
0595   ////             Just perform the fit without any of these optimizations (this is the default logic)
0596   /////   ---> if "floatShapeParameters==true" && "fixVars is NOT empty" :
0597   ////             In each bin, fix the desired parameters and perform the fit to obtain a good starting point.
0598   /////            Then float these parameters and fit again.
0599   ////    ---> if "floatShapeParameters==false" && "fixVars is empty" :
0600   ////             Do not perform any fit at all. Just print error message.
0601   ////    ---> if "floatShapeParameters==false" && "fixVars is NOT empty" :
0602   ///              Perform a global fit to the whole sample, save the fitted values of the
0603   ///              user specified parameters, and fix them for bin-by-bin fit.
0604 
0605   if (!fixVars.empty()) {
0606     // calculate initial values for parameters user want to fix
0607     if (!floatShapeParameters && fixVarValues.empty()) {
0608       // we want to fix these parameters for each bin.
0609       // the following sequence fixes them, fits, releases and fits again
0610       // to get reasonable values.
0611       // ----------------------------------------------------------------------
0612       // This procedure works only once with a whole dataset (without binning)
0613       // ----------------------------------------------------------------------
0614 
0615       // fix them
0616       varFixer(w, true);
0617       //do fit
0618       minimizer.minimize("Minuit2", "Scan");
0619       minimizer.migrad();
0620       minimizer.hesse();
0621       //minuit.minos();
0622       //w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0623       //PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0624       //release vars
0625       varFixer(w, false);
0626       //do fit
0627       minimizer.minimize("Minuit2", "Scan");
0628       minimizer.migrad();
0629       minimizer.hesse();
0630       //minuit.minos();
0631       //w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0632       //PrintLevel(quiet?-1:1), PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0633       //save vars
0634       varSaver(w);
0635       // now we have a starting point. Fit will converge faster.
0636     }
0637 
0638     // here we can use initial values if we want (this works for each bin)
0639     if (!floatShapeParameters)
0640       varRestorer(w);  //restore vars
0641 
0642     //do fit
0643     minimizer.minimize("Minuit2", "Scan");
0644     minimizer.migrad();
0645     minimizer.hesse();
0646     // initialize the profile likelihood
0647     profileLL.getVal();
0648     RooMinimizer* profMinuit = profileLL.minimizer();
0649     profMinuit->setProfile(true);
0650     profMinuit->setStrategy(2);
0651     profMinuit->setPrintLevel(1);
0652     profMinuit->minos(*w->var("efficiency"));
0653     res.reset(profMinuit->save());
0654     //res = w->pdf("simPdf")->fitTo(*data, Save(true), Extended(true), NumCPU(numCPU), Strategy(2),
0655     //Minos(*w->var("efficiency")), PrintLevel(quiet?-1:1),
0656     //PrintEvalErrors(quiet?-1:1), Warnings(!quiet));
0657   }  //if(!fixVars.empty())
0658 
0659   // (default = true) if we don't want to fix any parameters or want to fit each bin with all parameters floating
0660   if (floatShapeParameters) {
0661     //release vars
0662     varFixer(w, false);
0663 
0664     //do fit
0665     minimizer.minimize("Minuit2", "Scan");
0666     minimizer.migrad();
0667     minimizer.hesse();
0668     res.reset(w->pdf("simPdf")->fitTo(*data,
0669                                       Save(true),
0670                                       Extended(true),
0671                                       NumCPU(numCPU),
0672                                       Strategy(2),
0673                                       Minos(*w->var("efficiency")),
0674                                       PrintLevel(quiet ? -1 : 1),
0675                                       PrintEvalErrors(quiet ? -1 : 1),
0676                                       Warnings(!quiet)));
0677   }
0678 
0679   // save everything
0680   res->Write("fitresults");
0681   w->saveSnapshot("finalState", w->components());
0682   saveFitPlot(w);
0683   //extract the efficiency parameter from the results
0684   RooRealVar* e = (RooRealVar*)res->floatParsFinal().find("efficiency");
0685   //What's wrong with this? and why don't they copy the errors!
0686   //efficiency = *e;
0687   efficiency.setVal(e->getVal());
0688   Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
0689   if (errLo == 0 && e->getVal() < 0.5)
0690     errLo = e->getMin() - e->getVal();
0691   if (errHi == 0 && e->getVal() > 0.5)
0692     errHi = e->getMax() - e->getVal();
0693   efficiency.setAsymError(errLo, errHi);
0694 
0695   if (totPassing * totFailing == 0) {
0696     RooRealVar* nTot = (RooRealVar*)res->floatParsFinal().find("numTot");
0697     RooRealVar* fSig = (RooRealVar*)res->floatParsFinal().find("fSigAll");
0698     double cerr = ROOT::Math::beta_quantile(1 - (1.0 - .68540158589942957) / 2, 1, nTot->getVal() * fSig->getVal());
0699     /*
0700     std::cout << "======================================================================================" << std::endl;
0701     std::cout << "======= totPassing "  << totPassing << ", totFailing " << totFailing << std::endl;
0702     std::cout << "======= FIT: e  "  <<  e->getVal() << ",  e Lo " << e->getErrorLo()  << ",  e Hi " <<  e->getErrorHi() << std::endl;
0703     std::cout << "======= FIT:nS  "  << nS->getVal() << ", nS Lo " << nS->getErrorLo() << ", nS Hi " << nS->getErrorHi() << std::endl;
0704     std::cout << "======= FIT:nB  "  << nB->getVal() << ", nB Lo " << nB->getErrorLo() << ", nB Hi " << nB->getErrorHi() << std::endl;
0705     std::cout << "======= CNT:    "  << cerr << std::endl;
0706     std::cout << "======================================================================================" << std::endl;
0707     */
0708     if (totPassing == 0) {
0709       efficiency.setVal(0);
0710       efficiency.setAsymError(0, cerr);
0711     } else {
0712       efficiency.setVal(1);
0713       efficiency.setAsymError(-cerr, 0);
0714     }
0715   }
0716 
0717   delete simNLL;
0718 }
0719 
0720 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands) {
0721   // create the signal and background pdfs defined by the user
0722   for (unsigned int i = 0; i < pdfCommands.size(); i++) {
0723     const std::string& command = pdfCommands[i];
0724     if (command.find("#import ") == 0) {
0725       TDirectory* here = gDirectory;
0726       w->import(command.substr(8).c_str());
0727       here->cd();
0728     } else {
0729       TDirectory* here = gDirectory;
0730       w->factory(command.c_str());
0731       here->cd();
0732     }
0733   }
0734   // setup the simultaneous extended pdf
0735 
0736   w->factory("expr::nSignalPass('efficiency*fSigAll*numTot', efficiency, fSigAll[.9,0,1],numTot[1,0,1e10])");
0737   w->factory("expr::nSignalFail('(1-efficiency)*fSigAll*numTot', efficiency, fSigAll,numTot)");
0738   w->factory("expr::nBkgPass('effBkg*(1-fSigAll)*numTot', effBkg[.9,0,1],fSigAll,numTot)");
0739   w->factory("expr::nBkgFail('(1-effBkg)*(1-fSigAll)*numTot', effBkg,fSigAll,numTot)");
0740   TString sPass = "signal", sFail = "signal";
0741   if (w->pdf("signalPass") != nullptr && w->pdf("signalFail") != nullptr) {
0742     if (w->pdf("signal") != nullptr)
0743       throw std::logic_error(
0744           "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0745     sPass = "signalPass";
0746     sFail = "signalFail";
0747   } else if (w->pdf("signal") != nullptr) {
0748     if (w->pdf("signalPass") != nullptr || w->pdf("signalFail") != nullptr) {
0749       throw std::logic_error(
0750           "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0751     }
0752   } else {
0753     throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
0754   }
0755   w->factory("SUM::pdfPass(nSignalPass*" + sPass + ", nBkgPass*backgroundPass)");  //fBkgPass*
0756   w->factory("SUM::pdfFail(nSignalFail*" + sFail + ", nBkgFail*backgroundFail)");  //fBkgFail*
0757 
0758   w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
0759   // signalFractionInPassing is not used in the fit just to set the initial values
0760   if (w->pdf("simPdf") == nullptr)
0761     throw std::runtime_error("Could not create simultaneous fit pdf.");
0762   if (w->var("signalFractionInPassing") == nullptr)
0763     w->factory("signalFractionInPassing[0.9]");
0764 }
0765 
0766 void TagProbeFitter::setInitialValues(RooWorkspace* w) {
0767   // calculate initial values
0768   double signalEfficiency = w->var("efficiency")->getVal();
0769   double signalFractionInPassing = w->var("signalFractionInPassing")->getVal();
0770   double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0771   double totFailinging = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0772   double numSignalAll = totPassing * signalFractionInPassing / signalEfficiency;
0773 
0774   //std::cout << "Number of probes: " << totPassing+totFailinging << std::endl;
0775 
0776   // check if this value is inconsistent on the failing side
0777   if (numSignalAll * (1 - signalEfficiency) > totFailinging)
0778     numSignalAll = totFailinging;
0779   // now set the values
0780   w->var("numTot")->setVal(totPassing + totFailinging);
0781   w->var("numTot")->setMax(2.0 * (totPassing + totFailinging) + 10);  //wiggle room in case of 0 events in bin
0782 
0783   if (totPassing == 0) {
0784     w->var("efficiency")->setVal(0.0);
0785     w->var("efficiency")->setAsymError(0, 1);
0786     w->var("efficiency")->setConstant(false);
0787   } else if (totFailinging == 0) {
0788     w->var("efficiency")->setVal(1.0);
0789     w->var("efficiency")->setAsymError(-1, 0);
0790     w->var("efficiency")->setConstant(false);
0791   } else {
0792     w->var("efficiency")->setConstant(false);
0793   }
0794 
0795   // if signal fraction is 1 then set the number of background events to 0.
0796   //RooRealVar* fBkgPass = w->var("numBackgroundPass");
0797   //if(signalFractionInPassing==1.0) { fBkgPass->setVal(0.0); fBkgPass->setConstant(true); }
0798 
0799   // save initial state for reference
0800   w->saveSnapshot("initialState", w->components());
0801 }
0802 
0803 void TagProbeFitter::saveFitPlot(RooWorkspace* w) {
0804   // save refferences for convenience
0805   RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
0806   RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data"));
0807   RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0808   RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0809   RooAbsPdf& pdf = *w->pdf("simPdf");
0810   std::unique_ptr<RooArgSet> obs(pdf.getObservables(*dataAll));
0811   RooRealVar* mass = nullptr;
0812   RooLinkedListIter it = obs->iterator();
0813   for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0814     if (!v->InheritsFrom("RooRealVar"))
0815       continue;
0816     mass = (RooRealVar*)v;
0817     break;
0818   }
0819   if (!mass)
0820     return;
0821   // make a 2x2 canvas
0822   TCanvas canvas("fit_canvas");
0823   canvas.Divide(2, 2);
0824   vector<RooPlot*> frames;
0825   // plot the Passing Probes
0826   canvas.cd(1);
0827   if (massBins == 0) {
0828     frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
0829     frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
0830     frames.push_back(mass->frame(Name("All"), Title("All Probes")));
0831   } else {
0832     frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
0833     frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
0834     frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
0835   }
0836   dataPass->plotOn(frames[0]);
0837   pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
0838   pdf.plotOn(frames[0],
0839              Slice(efficiencyCategory, "Passed"),
0840              ProjWData(*dataPass),
0841              LineColor(kGreen),
0842              Components("backgroundPass"),
0843              LineStyle(kDashed));
0844   frames[0]->Draw();
0845   // plot the Failing Probes
0846   canvas.cd(2);
0847   dataFail->plotOn(frames[1]);
0848   pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
0849   pdf.plotOn(frames[1],
0850              Slice(efficiencyCategory, "Failed"),
0851              ProjWData(*dataFail),
0852              LineColor(kRed),
0853              Components("backgroundFail"),
0854              LineStyle(kDashed));
0855   frames[1]->Draw();
0856   // plot the All Probes
0857   canvas.cd(3);
0858   dataAll->plotOn(frames.back());
0859   pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
0860   pdf.plotOn(frames.back(),
0861              ProjWData(*dataAll),
0862              LineColor(kBlue),
0863              Components("backgroundPass,backgroundFail"),
0864              LineStyle(kDashed));
0865   frames.back()->Draw();
0866   // plot the Fit Results
0867   canvas.cd(4);
0868   frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
0869   pdf.paramOn(frames.back(), dataAll, "", 0, "NELU", 0.1, 0.9, 0.9);
0870   // draw only the parameter box not the whole frame
0871   frames.back()->findObject(Form("%s_paramBox", pdf.GetName()))->Draw();
0872   //save and clean up
0873   canvas.Draw();
0874   canvas.Write();
0875   for (size_t i = 0; i < frames.size(); i++) {
0876     delete frames[i];
0877   }
0878   delete dataPass;
0879   delete dataFail;
0880 }
0881 
0882 void TagProbeFitter::saveDistributionsPlot(RooWorkspace* w) {
0883   // save pointers to datasets to manage memory
0884   RooAbsData* dataAll = w->data("data");
0885   RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0886   RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0887 
0888   const RooArgSet* vars = dataAll->get();
0889   vector<RooRealVar*> reals;
0890   RooLinkedListIter it = vars->iterator();
0891   for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0892     if (!v->InheritsFrom("RooRealVar"))
0893       continue;
0894     reals.push_back((RooRealVar*)v);
0895   }
0896   TCanvas canvas("distributions_canvas");
0897   canvas.Divide(3, reals.size());
0898   vector<RooPlot*> frames;
0899   for (unsigned int i = 0; i < reals.size(); i++) {
0900     // plot the Passing Probes
0901     canvas.cd(3 * i + 1);
0902     frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
0903     dataPass->plotOn(frames.back(), LineColor(kGreen));
0904     dataPass->statOn(frames.back());
0905     frames.back()->Draw();
0906     // plot the Failing Probes
0907     canvas.cd(3 * i + 2);
0908     frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
0909     dataFail->plotOn(frames.back(), LineColor(kRed));
0910     dataFail->statOn(frames.back());
0911     frames.back()->Draw();
0912     // plot the All Probes
0913     canvas.cd(3 * i + 3);
0914     frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
0915     dataAll->plotOn(frames.back(), LineColor(kBlue));
0916     dataAll->statOn(frames.back());
0917     frames.back()->Draw();
0918   }
0919   canvas.Draw();
0920   canvas.Write();
0921   for (size_t i = 0; i < frames.size(); i++) {
0922     delete frames[i];
0923   }
0924   delete dataPass;
0925   delete dataFail;
0926 }
0927 
0928 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff,
0929                                          const TString& effName,
0930                                          RooArgSet& binnedVariables,
0931                                          RooArgSet& mappedCategories) {
0932   RooLinkedListIter v1it = binnedVariables.iterator();
0933   bool isOnePoint =
0934       (eff.numEntries() == 1);  // for datasets with > 1 entry, we don't make plots for variables with just one bin
0935   for (RooRealVar* v1 = (RooRealVar*)v1it.Next(); v1 != nullptr; v1 = (RooRealVar*)v1it.Next()) {
0936     RooArgSet binCategories1D;
0937     if (v1->numBins() == 1 && !isOnePoint)
0938       continue;
0939     RooLinkedListIter v2it = binnedVariables.iterator();
0940     for (RooRealVar* v2 = (RooRealVar*)v2it.Next(); v2 != nullptr; v2 = (RooRealVar*)v2it.Next()) {
0941       if (v2 == v1)
0942         continue;
0943       if (v2->numBins() == 1 && !isOnePoint)
0944         continue;
0945       binCategories1D.addClone(
0946           RooBinningCategory(TString(v2->GetName()) + "_bins", TString(v2->GetName()) + "_bins", *v2));
0947 
0948       RooArgSet binCategories2D;
0949       RooLinkedListIter v3it = binnedVariables.iterator();
0950       for (RooRealVar* v3 = (RooRealVar*)v3it.Next(); v3 != nullptr; v3 = (RooRealVar*)v3it.Next()) {
0951         if (v3 == v1 || v3 == v2)
0952           continue;
0953         binCategories2D.addClone(
0954             RooBinningCategory(TString(v3->GetName()) + "_bins", TString(v3->GetName()) + "_bins", *v3));
0955       }
0956       RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
0957       if (allCats2D.numTypes() == 0) {
0958         makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
0959       } else {
0960         RooDataSet myEff(eff);
0961         myEff.addColumn(allCats2D);
0962         std::unique_ptr<TIterator> catIt(allCats2D.typeIterator());
0963         for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0964           TString catName = t->GetName();
0965           if (catName.Contains("NotMapped"))
0966             continue;
0967           catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0968           makeEfficiencyPlot2D(myEff,
0969                                *v1,
0970                                *v2,
0971                                TString::Format("%s_%s_PLOT_%s", v1->GetName(), v2->GetName(), catName.Data()),
0972                                catName,
0973                                effName,
0974                                "allCats2D",
0975                                t->getVal());
0976         }
0977       }
0978     }
0979     RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
0980     if (allCats1D.numTypes() == 0) {
0981       makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
0982     } else {
0983       RooDataSet myEff(eff);
0984       myEff.addColumn(allCats1D);
0985       std::unique_ptr<TIterator> catIt(allCats1D.typeIterator());
0986       for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0987         TString catName = t->GetName();
0988         if (catName.Contains("NotMapped"))
0989           continue;
0990         catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0991         makeEfficiencyPlot1D(myEff,
0992                              *v1,
0993                              TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data()),
0994                              catName,
0995                              effName,
0996                              "allCats1D",
0997                              t->getVal());
0998       }
0999     }
1000   }
1001 }
1002 
1003 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff,
1004                                           RooRealVar& v,
1005                                           const TString& plotName,
1006                                           const TString& plotTitle,
1007                                           const TString& effName,
1008                                           const char* catName,
1009                                           int catIndex) {
1010   TGraphAsymmErrors* p = new TGraphAsymmErrors();
1011   const RooArgSet* entry = eff.get();
1012   const RooRealVar& vi = dynamic_cast<const RooRealVar&>(*entry->find(v.GetName()));
1013   const RooRealVar& ei = dynamic_cast<const RooRealVar&>(*entry->find("efficiency"));
1014   for (unsigned int i = 0, n = eff.numEntries(); i < n; ++i) {
1015     entry = eff.get(i);
1016     if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1017       continue;
1018     int j = p->GetN();
1019     p->Set(j + 1);
1020     p->SetPoint(j, vi.getVal(), ei.getVal());
1021     p->SetPointError(j, -vi.getAsymErrorLo(), vi.getAsymErrorHi(), -ei.getAsymErrorLo(), ei.getAsymErrorHi());
1022   }
1023   TCanvas canvas(plotName);
1024   TH1F* frame = new TH1F("frame", "Efficiency of " + effName, 1, v.getMin(), v.getMax());
1025   frame->SetDirectory(nullptr);
1026   p->SetNameTitle(Form("hxy_%s", eff.GetName()), "Efficiency of " + effName);
1027   p->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1028   p->GetYaxis()->SetTitle("Efficiency of " + effName);
1029   frame->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1030   frame->GetYaxis()->SetTitle("Efficiency of " + effName);
1031   frame->GetYaxis()->SetRangeUser(0, 1);
1032   frame->Draw();
1033   p->SetLineWidth(2);
1034   p->SetMarkerStyle(kFullCircle);
1035   p->SetMarkerSize(1.2);
1036   p->Draw("P SAME");
1037   canvas.Write();
1038   delete frame;
1039   delete p;
1040 }
1041 
1042 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff,
1043                                           RooRealVar& v1,
1044                                           RooRealVar& v2,
1045                                           const TString& plotName,
1046                                           const TString& plotTitle,
1047                                           const TString& effName,
1048                                           const char* catName,
1049                                           int catIndex) {
1050   TCanvas canvas(plotName);
1051   canvas.SetRightMargin(0.15);
1052   TH2F* h = new TH2F(plotName,
1053                      plotName,
1054                      v1.getBinning().numBins(),
1055                      v1.getBinning().array(),
1056                      v2.getBinning().numBins(),
1057                      v2.getBinning().array());
1058   const RooArgSet* set = eff.get();
1059   RooRealVar* e = (RooRealVar*)set->find("efficiency");
1060   RooRealVar* v1_ = (RooRealVar*)set->find(v1.GetName());
1061   RooRealVar* v2_ = (RooRealVar*)set->find(v2.GetName());
1062   h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s",
1063                               plotTitle.Data(),
1064                               v1.GetTitle(),
1065                               TString(v1.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v1.getUnit()).Data(),
1066                               v2.GetTitle(),
1067                               TString(v2.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v2.getUnit()).Data(),
1068                               effName.Data()));
1069   h->SetOption("colztexte");
1070   h->GetZaxis()->SetRangeUser(-0.001, 1.001);
1071   h->SetStats(kFALSE);
1072   for (int i = 0; i < eff.numEntries(); i++) {
1073     const RooArgSet* entry = eff.get(i);
1074     if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1075       continue;
1076     h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
1077     h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi() - e->getErrorLo()) / 2.);
1078   }
1079   h->Draw();
1080   canvas.Draw();
1081   canvas.Write();
1082   delete h;
1083 }
1084 
1085 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency) {}
1086 
1087 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency) {
1088   int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
1089   int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
1090   double e = (pass + fail == 0) ? 0 : pass / double(pass + fail);
1091   // Use Clopper-Pearson
1092   double alpha = (1.0 - .68540158589942957) / 2;
1093   double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile(alpha, pass, fail + 1);
1094   double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile(1 - alpha, pass + 1, fail);
1095   ////from TGraphAsymmErrors
1096   //double lob, hib;
1097   //Efficiency( pass, pass+fail, .68540158589942957, e, lob, hib );
1098   //std::cerr << "CNT " << pass << "/" << fail << ":  Clopper Pearson [" << lo << ", "  << hi << "], Bayes [" << lob << ", " << hib << "]" << std::endl;
1099   efficiency.setVal(e);
1100   efficiency.setAsymError(lo - e, hi - e);
1101 }
1102 
1103 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix) {
1104   std::vector<std::string>::const_iterator it;
1105   for (it = fixVars.begin(); it < fixVars.end(); it++) {
1106     if (w->var((*it).c_str()))
1107       w->var((*it).c_str())->setAttribute("Constant", fix);
1108     else {
1109       std::cout << "TagProbeFitter: "
1110                 << "Can't find a variable to fix: " << *it;
1111     }
1112   }
1113 }
1114 
1115 void TagProbeFitter::varSaver(RooWorkspace* w) {
1116   if (!fixVarValues.empty()) {
1117     std::cout << "attempt to save variables more than once!" << std::endl;
1118     return;
1119   }
1120   std::vector<std::string>::const_iterator it;
1121   for (it = fixVars.begin(); it < fixVars.end(); it++) {
1122     fixVarValues.push_back(w->var((*it).c_str())->getVal());
1123   }
1124 }
1125 
1126 void TagProbeFitter::varRestorer(RooWorkspace* w) {
1127   if (fixVarValues.size() == fixVars.size())
1128     for (unsigned int i = 0; i < fixVars.size(); i++) {
1129       std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
1130       w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
1131     }
1132   else {
1133     std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
1134   }
1135 }