File indexing completed on 2025-02-20 03:45:47
0001 #include "PhysicsTools/TagAndProbe/interface/TagProbeFitter.h"
0002 #include <memory>
0003
0004 #include <stdexcept>
0005
0006 #include "TROOT.h"
0007 #include "TFile.h"
0008 #include "TPad.h"
0009 #include "TCanvas.h"
0010 #include "TGraphAsymmErrors.h"
0011 #include "TH2F.h"
0012 #include "TStyle.h"
0013 #include "Math/QuantFuncMathCore.h"
0014 #include "RooAbsReal.h"
0015 #include "RooBinning.h"
0016 #include "RooBinningCategory.h"
0017 #include "RooCategory.h"
0018 #include "RooDataHist.h"
0019 #include "RooDataSet.h"
0020 #include "RooFitResult.h"
0021 #include "RooFormulaVar.h"
0022 #include "RooGlobalFunc.h"
0023 #include "RooMappedCategory.h"
0024 #include "RooMinimizer.h"
0025 #include "RooMsgService.h"
0026 #include "RooMultiCategory.h"
0027 #include "RooNumIntConfig.h"
0028 #include "RooPlot.h"
0029 #include "RooProfileLL.h"
0030 #include "RooRealVar.h"
0031 #include "RooThresholdCategory.h"
0032 #include "RooWorkspace.h"
0033 #include "RooTreeDataStore.h"
0034
0035 using namespace std;
0036 using namespace RooFit;
0037
0038 TagProbeFitter::TagProbeFitter(const std::vector<std::string>& inputFileNames,
0039 string inputDirectoryName,
0040 string inputTreeName,
0041 string outputFileName,
0042 int numCPU_,
0043 bool saveWorkspace_,
0044 bool floatShapeParameters_,
0045 const std::vector<std::string>& fixVars_) {
0046 inputTree = new TChain((inputDirectoryName + "/" + inputTreeName).c_str());
0047 for (size_t i = 0; i < inputFileNames.size(); i++) {
0048 inputTree->Add(inputFileNames[i].c_str());
0049 }
0050 outputFile = new TFile(outputFileName.c_str(), "recreate");
0051 outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
0052 numCPU = numCPU_;
0053 saveWorkspace = saveWorkspace_;
0054 massBins = 0;
0055 floatShapeParameters = floatShapeParameters_;
0056 fixVars = fixVars_;
0057 weightVar = "";
0058 if (!floatShapeParameters && fixVars.empty())
0059 std::cout << "TagProbeFitter: "
0060 << "You wnat to fix some variables but do not specify them!";
0061
0062 gROOT->SetStyle("Plain");
0063 gStyle->SetTitleFillColor(0);
0064 gStyle->SetPalette(1);
0065 gStyle->SetOptStat(0);
0066 gStyle->SetPaintTextFormat(".2f");
0067
0068 quiet = false;
0069
0070 binnedFit = false;
0071
0072 doSaveDistributionsPlot = true;
0073
0074
0075 RooAbsReal::defaultIntegratorConfig()->setEpsAbs(1e-13);
0076 RooAbsReal::defaultIntegratorConfig()->setEpsRel(1e-13);
0077
0078 split_mode = 0;
0079 }
0080
0081 TagProbeFitter::~TagProbeFitter() {
0082 if (inputTree)
0083 delete inputTree;
0084 if (outputFile)
0085 outputFile->Close();
0086 }
0087
0088 void TagProbeFitter::setQuiet(bool quiet_) {
0089 quiet = quiet_;
0090 RooMsgService::instance().setGlobalKillBelow(quiet ? RooFit::ERROR : RooFit::WARNING);
0091 }
0092
0093 void TagProbeFitter::setSplitMode(unsigned int nevents) { split_mode = nevents; }
0094
0095 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units) {
0096 RooRealVar temp(name.c_str(), title.c_str(), low, hi, units.c_str());
0097 temp.setBins(5000, "cache");
0098 variables.addClone(temp);
0099 return true;
0100 }
0101
0102 bool TagProbeFitter::addCategory(string name, string title, string expression) {
0103 RooCategory* c = (RooCategory*)parameterParser.factory(expression.c_str());
0104 if (!c)
0105 return false;
0106
0107 c->SetName(name.c_str());
0108 c->SetTitle(title.c_str());
0109 variables.addClone(*c);
0110 return true;
0111 }
0112
0113 bool TagProbeFitter::addExpression(string expressionName,
0114 string title,
0115 string expression,
0116 const std::vector<string>& arguments) {
0117 expressionVars.emplace_back(make_pair(expressionName, title), make_pair(expression, arguments));
0118 return true;
0119 }
0120
0121 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue) {
0122 thresholdCategories.emplace_back(make_pair(categoryName, title), make_pair(varName, cutValue));
0123 return true;
0124 }
0125
0126 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands) { pdfs[name] = pdfCommands; }
0127
0128 void TagProbeFitter::setBinsForMassPlots(int bins) { massBins = bins; }
0129
0130 void TagProbeFitter::setWeightVar(const std::string& var) { weightVar = var; }
0131
0132 string TagProbeFitter::calculateEfficiency(string dirName,
0133 const std::vector<string>& effCats,
0134 const std::vector<string>& effStates,
0135 vector<string>& unbinnedVariables,
0136 map<string, vector<double> >& binnedReals,
0137 map<string, std::vector<string> >& binnedCategories,
0138 vector<string>& binToPDFmap) {
0139
0140 outputDirectory->cd();
0141
0142 gDirectory->mkdir(dirName.c_str())->cd();
0143
0144 RooArgSet dataVars;
0145
0146
0147 for (std::string const& v : unbinnedVariables) {
0148 dataVars.addClone(variables[v.c_str()], true);
0149 if (binnedFit && (&v == &unbinnedVariables.front())) {
0150 ((RooRealVar&)dataVars[v.c_str()]).setBins(massBins);
0151 }
0152 }
0153
0154 RooArgSet binnedVariables;
0155 RooArgSet binCategories;
0156 for (auto const& [name, binEdges] : binnedReals) {
0157 if (variables.find(name.c_str()) == nullptr) {
0158 std::cerr << "Binned variable '" << name << "' not found." << std::endl;
0159 return "Error";
0160 }
0161 binnedVariables.add(*dataVars.addClone(variables[name]));
0162 ((RooRealVar&)binnedVariables[name]).setBinning(RooBinning(binEdges.size() - 1, binEdges.data()));
0163 std::string nameBins = name + "_bins";
0164 binCategories.addClone(
0165 RooBinningCategory(nameBins.c_str(), nameBins.c_str(), (RooRealVar&)binnedVariables[name.c_str()]));
0166 }
0167
0168
0169 RooArgSet categories;
0170 RooArgSet mappedCategories;
0171 for (auto const& [name, binnedVariableNames] : binnedCategories) {
0172 if (variables.find(name.c_str()) == nullptr) {
0173 std::cerr << "Binned category '" << name << "' not found." << std::endl;
0174 return "Error";
0175 }
0176 categories.add(*dataVars.addClone(variables[name]));
0177 std::string nameBins = name + "_bins";
0178 mappedCategories.addClone(
0179 RooMappedCategory(nameBins.c_str(), nameBins.c_str(), (RooCategory&)categories[name.c_str()]));
0180 for (std::size_t i = 0; i < binnedVariableNames.size(); i++) {
0181 ((RooMappedCategory&)mappedCategories[name + "_bins"])
0182 .map(binnedVariableNames[i].c_str(),
0183 name + "_" + TString(binnedVariableNames[i].c_str()).ReplaceAll(",", "_"));
0184 }
0185 }
0186
0187
0188 for (std::string const& effCat : effCats) {
0189 if (variables.find(effCat.c_str()) != nullptr) {
0190 dataVars.addClone(variables[effCat.c_str()], true);
0191 }
0192 }
0193
0194
0195 for (auto const& [_, names] : expressionVars) {
0196 for (std::string const& name : names.second) {
0197
0198 if (auto* found = variables.find(name.c_str()))
0199 dataVars.addClone(*found, true);
0200 }
0201 }
0202
0203 for (auto const& [catNameTitle, varNameValue] : thresholdCategories) {
0204 if (variables.find(varNameValue.first.c_str()))
0205 dataVars.addClone(variables[varNameValue.first.c_str()], true);
0206 }
0207
0208
0209 RooDataSet* data(nullptr);
0210 if (not split_mode) {
0211 data = new RooDataSet("data",
0212 "data",
0213 dataVars,
0214 Import(*inputTree),
0215 WeightVar(weightVar.empty() ? nullptr : weightVar.c_str()));
0216
0217
0218 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0219 ev = expressionVars.begin(),
0220 eve = expressionVars.end();
0221 ev != eve;
0222 ++ev) {
0223 RooArgList args;
0224 for (std::string const& it : ev->second.second) {
0225 args.add(dataVars[it.c_str()]);
0226 }
0227 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0228 dataVars.addClone(*static_cast<RooRealVar*>(data->addColumn(expr)));
0229 }
0230
0231
0232 for (auto const& [catNameTitle, varNameValue] : thresholdCategories) {
0233 RooThresholdCategory tmp(catNameTitle.first.c_str(),
0234 catNameTitle.second.c_str(),
0235 (RooAbsReal&)dataVars[varNameValue.first.c_str()],
0236 "above",
0237 1);
0238 tmp.addThreshold(varNameValue.second, "below", 0);
0239 dataVars.addClone(*static_cast<RooCategory*>(data->addColumn(tmp)));
0240 }
0241 }
0242
0243
0244 RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
0245 string effName;
0246
0247 if (not split_mode) {
0248 data->addColumn(allCats);
0249
0250 if (effCats.size() == 1) {
0251 effName = effCats.front() + "::" + effStates.front();
0252 RooMappedCategory efficiencyCategory(
0253 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0254 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0255 data->addColumn(efficiencyCategory);
0256 } else {
0257 RooArgSet rooEffCats;
0258 string multiState = "{";
0259 for (size_t i = 0; i < effCats.size(); ++i) {
0260 if (i) {
0261 multiState += ";";
0262 effName += " && ";
0263 }
0264 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0265 multiState += effStates[i];
0266 effName = effCats[i] + "::" + effStates[i];
0267 }
0268 multiState += "}";
0269 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0270 RooMappedCategory efficiencyCategory(
0271 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0272 efficiencyCategory.map(multiState.c_str(), "Passed");
0273 data->addColumn(efficiencyCategory);
0274 }
0275 }
0276
0277
0278 RooMappedCategory pdfCategory(
0279 "_pdfCategory_", "_pdfCategory_", allCats, (!binToPDFmap.empty()) ? binToPDFmap[0].c_str() : "all");
0280 for (unsigned int i = 1; i < binToPDFmap.size(); i += 2) {
0281 pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i + 1].c_str());
0282 }
0283 if (not split_mode)
0284 data->addColumn(pdfCategory);
0285
0286
0287 RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
0288
0289 RooDataSet fitEfficiency("fit_eff",
0290 "Efficiency from unbinned ML fit",
0291 {RooArgSet(binnedVariables, categories), efficiency},
0292 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0293
0294 RooDataSet cntEfficiency("cnt_eff",
0295 "Efficiency from counting",
0296 {RooArgSet(binnedVariables, categories), efficiency},
0297 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0298
0299 if (not split_mode) {
0300 if (!floatShapeParameters) {
0301
0302 RooWorkspace ws;
0303 ws.import(*data);
0304 efficiency.setVal(0);
0305 efficiency.setAsymError(0, 0);
0306 std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
0307 doFitEfficiency(&ws, pdfCategory.getLabel(), efficiency);
0308 }
0309 } else {
0310
0311 inputTree->SetBranchStatus("*", false);
0312 for (TObject* obj : dataVars)
0313 inputTree->SetBranchStatus(obj->GetName(), true);
0314 }
0315
0316
0317
0318
0319 for (auto const& [catNameStr, iCat] : allCats) {
0320
0321 TString catName = catNameStr;
0322
0323 if (catName.Contains("NotMapped"))
0324 continue;
0325
0326 RooDataSet* data_bin(nullptr);
0327 RooArgSet tmpVars;
0328
0329 if (not split_mode) {
0330
0331 data_bin = (RooDataSet*)data->reduce(Cut(TString::Format("allCats==%d", iCat)));
0332 } else {
0333 data_bin = new RooDataSet("data", "data", dataVars, WeightVar(weightVar.empty() ? nullptr : weightVar.c_str()));
0334
0335 TDirectory* tmp = gDirectory;
0336 gROOT->cd();
0337
0338
0339
0340 unsigned int n_entries = inputTree->GetEntries();
0341 printf("Input number of events: %u\n", n_entries);
0342 unsigned int first_entry = 0;
0343 while (first_entry < n_entries) {
0344 std::unique_ptr<TTree> copyTree{inputTree->CopyTree("", "", split_mode, first_entry)};
0345 RooTreeDataStore store("reader",
0346 "reader",
0347 dataVars,
0348 *copyTree,
0349 "",
0350 (weightVar.empty() ? nullptr : weightVar.c_str()));
0351 for (unsigned int i = 0; i < copyTree->GetEntries(); ++i) {
0352 store.get(i);
0353 if (allCats.getIndex() == iCat) {
0354 data_bin->add(dataVars, weightVar.empty() ? 1.0 : dataVars.getRealValue(weightVar.c_str()));
0355 }
0356 }
0357 first_entry += split_mode;
0358 data_bin->Print("V");
0359 }
0360 data_bin->Print("V");
0361 tmp->cd();
0362
0363
0364 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0365 ev = expressionVars.begin(),
0366 eve = expressionVars.end();
0367 ev != eve;
0368 ++ev) {
0369 RooArgList args;
0370 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0371 ++it) {
0372 args.add(dataVars[it->c_str()]);
0373 }
0374 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0375 RooRealVar* col = (RooRealVar*)data_bin->addColumn(expr);
0376 tmpVars.add(*dataVars.addClone(*col));
0377 }
0378
0379
0380 for (auto const& [catNameTitle, varNameValue] : thresholdCategories) {
0381 RooThresholdCategory tmp(catNameTitle.first.c_str(),
0382 catNameTitle.second.c_str(),
0383 (RooAbsReal&)dataVars[varNameValue.first.c_str()],
0384 "above",
0385 1);
0386 tmp.addThreshold(varNameValue.second, "below", 0);
0387 tmpVars.add(*dataVars.addClone(*static_cast<RooCategory*>(data_bin->addColumn(tmp))));
0388 }
0389
0390
0391 if (effCats.size() == 1) {
0392 effName = effCats.front() + "::" + effStates.front();
0393 RooMappedCategory efficiencyCategory(
0394 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0395 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0396 data_bin->addColumn(efficiencyCategory);
0397 } else {
0398 RooArgSet rooEffCats;
0399 string multiState = "{";
0400 for (size_t i = 0; i < effCats.size(); ++i) {
0401 if (i) {
0402 multiState += ";";
0403 effName += " && ";
0404 }
0405 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0406 multiState += effStates[i];
0407 effName = effCats[i] + "::" + effStates[i];
0408 }
0409 multiState += "}";
0410 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0411 RooMappedCategory efficiencyCategory(
0412 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0413 efficiencyCategory.map(multiState.c_str(), "Passed");
0414 data_bin->addColumn(efficiencyCategory);
0415 }
0416 data_bin->addColumn(pdfCategory);
0417 }
0418
0419
0420 const RooArgSet* row = data_bin->get();
0421
0422
0423 TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
0424
0425
0426 TString dirName = catName;
0427 dirName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "__");
0428 if (pdfName.Length() > 0) {
0429 dirName.Append("__").Append(pdfName);
0430 }
0431
0432 cout << "Fitting bin: " << dirName << endl;
0433
0434 gDirectory->mkdir(dirName)->cd();
0435
0436
0437 RooWorkspace* w = new RooWorkspace();
0438
0439 w->import(*data_bin);
0440 delete data_bin;
0441
0442 data_bin = dynamic_cast<RooDataSet*>(w->data("data"));
0443
0444
0445 if (doSaveDistributionsPlot)
0446 saveDistributionsPlot(w);
0447
0448 if (data_bin->numEntries() > 0) {
0449
0450 RooArgSet meanOfVariables;
0451 for (const RooAbsArg* vv : binnedVariables) {
0452 const RooRealVar* v = dynamic_cast<const RooRealVar*>(vv);
0453 meanOfVariables.addClone(*v);
0454 double mean = w->data("data")->mean(*v);
0455 RooBinning binning((RooBinning&)v->getBinning());
0456 int ind = binning.binNumber(mean);
0457 RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
0458 newVar.setVal(mean);
0459 newVar.setAsymError(binning.binLow(ind) - mean, binning.binHigh(ind) - mean);
0460 }
0461
0462
0463
0464 meanOfVariables.addClone(*data_bin->get(0), true);
0465
0466 efficiency.setVal(0);
0467 efficiency.setAsymError(0, 0);
0468 doFitEfficiency(w, pdfName.Data(), efficiency);
0469 fitEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0470
0471
0472
0473
0474
0475 efficiency.setVal(0);
0476 doCntEfficiency(w, efficiency);
0477 cntEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0478 }
0479
0480 if (saveWorkspace) {
0481 w->Write("w");
0482 }
0483
0484 delete w;
0485 if (split_mode)
0486 dataVars.remove(tmpVars);
0487
0488 gDirectory->cd("..");
0489 }
0490
0491
0492 fitEfficiency.Write();
0493 gDirectory->mkdir("fit_eff_plots")->cd();
0494 saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
0495 gDirectory->cd("..");
0496
0497
0498
0499
0500
0501
0502 cntEfficiency.Write();
0503 gDirectory->mkdir("cnt_eff_plots")->cd();
0504 saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
0505 gDirectory->cd("..");
0506
0507 if (not split_mode)
0508 delete data;
0509
0510
0511 return "";
0512 }
0513
0514 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency) {
0515
0516 if (pdfName == "all") {
0517 return;
0518 }
0519
0520 createPdf(w, pdfs[pdfName]);
0521
0522 setInitialValues(w);
0523 std::unique_ptr<RooFitResult> res;
0524
0525 RooAbsData* data = w->data("data");
0526 if (binnedFit) {
0527
0528 const RooArgSet* dataObs = data->get(0);
0529
0530 std::unique_ptr<RooArgSet> obs{w->pdf("simPdf")->getObservables(dataObs)};
0531 RooDataHist bdata{"data_binned", "data_binned", *obs, *data};
0532 w->import(bdata);
0533 data = w->data("data_binned");
0534 }
0535
0536 double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0537 double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0538
0539 std::unique_ptr<RooAbsReal> simNLL{w->pdf("simPdf")->createNLL(*data, Extended(true), NumCPU(numCPU))};
0540
0541 RooMinimizer minimizer(*simNLL);
0542 minimizer.setStrategy(1);
0543 minimizer.setProfile(true);
0544 RooProfileLL profileLL("simPdfNLL", "", *simNLL, *w->var("efficiency"));
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561 if (!fixVars.empty()) {
0562
0563 if (!floatShapeParameters && fixVarValues.empty()) {
0564
0565
0566
0567
0568
0569
0570
0571
0572 varFixer(w, true);
0573
0574 minimizer.minimize("Minuit2", "Scan");
0575 minimizer.migrad();
0576 minimizer.hesse();
0577
0578
0579
0580
0581 varFixer(w, false);
0582
0583 minimizer.minimize("Minuit2", "Scan");
0584 minimizer.migrad();
0585 minimizer.hesse();
0586
0587
0588
0589
0590 varSaver(w);
0591
0592 }
0593
0594
0595 if (!floatShapeParameters)
0596 varRestorer(w);
0597
0598
0599 minimizer.minimize("Minuit2", "Scan");
0600 minimizer.migrad();
0601 minimizer.hesse();
0602
0603 profileLL.getVal();
0604 RooMinimizer* profMinuit = profileLL.minimizer();
0605 profMinuit->setProfile(true);
0606 profMinuit->setStrategy(2);
0607 profMinuit->setPrintLevel(1);
0608 profMinuit->minos(*w->var("efficiency"));
0609 res.reset(profMinuit->save());
0610
0611
0612
0613 }
0614
0615
0616 if (floatShapeParameters) {
0617
0618 varFixer(w, false);
0619
0620
0621 minimizer.minimize("Minuit2", "Scan");
0622 minimizer.migrad();
0623 minimizer.hesse();
0624 res.reset(w->pdf("simPdf")->fitTo(*data,
0625 Save(true),
0626 Extended(true),
0627 NumCPU(numCPU),
0628 Strategy(2),
0629 Minos(*w->var("efficiency")),
0630 PrintLevel(quiet ? -1 : 1),
0631 PrintEvalErrors(quiet ? -1 : 1),
0632 Warnings(!quiet)));
0633 }
0634
0635
0636 res->Write("fitresults");
0637 w->saveSnapshot("finalState", w->components());
0638 saveFitPlot(w);
0639
0640 RooRealVar* e = (RooRealVar*)res->floatParsFinal().find("efficiency");
0641
0642
0643 efficiency.setVal(e->getVal());
0644 Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
0645 if (errLo == 0 && e->getVal() < 0.5)
0646 errLo = e->getMin() - e->getVal();
0647 if (errHi == 0 && e->getVal() > 0.5)
0648 errHi = e->getMax() - e->getVal();
0649 efficiency.setAsymError(errLo, errHi);
0650
0651 if (totPassing * totFailing == 0) {
0652 RooRealVar* nTot = (RooRealVar*)res->floatParsFinal().find("numTot");
0653 RooRealVar* fSig = (RooRealVar*)res->floatParsFinal().find("fSigAll");
0654 double cerr = ROOT::Math::beta_quantile(1 - (1.0 - .68540158589942957) / 2, 1, nTot->getVal() * fSig->getVal());
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664 if (totPassing == 0) {
0665 efficiency.setVal(0);
0666 efficiency.setAsymError(0, cerr);
0667 } else {
0668 efficiency.setVal(1);
0669 efficiency.setAsymError(-cerr, 0);
0670 }
0671 }
0672 }
0673
0674 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands) {
0675
0676 for (unsigned int i = 0; i < pdfCommands.size(); i++) {
0677 const std::string& command = pdfCommands[i];
0678 if (command.find("#import ") == 0) {
0679 TDirectory* here = gDirectory;
0680 w->import(command.substr(8).c_str());
0681 here->cd();
0682 } else {
0683 TDirectory* here = gDirectory;
0684 w->factory(command.c_str());
0685 here->cd();
0686 }
0687 }
0688
0689
0690 w->factory("expr::nSignalPass('efficiency*fSigAll*numTot', efficiency, fSigAll[.9,0,1],numTot[1,0,1e10])");
0691 w->factory("expr::nSignalFail('(1-efficiency)*fSigAll*numTot', efficiency, fSigAll,numTot)");
0692 w->factory("expr::nBkgPass('effBkg*(1-fSigAll)*numTot', effBkg[.9,0,1],fSigAll,numTot)");
0693 w->factory("expr::nBkgFail('(1-effBkg)*(1-fSigAll)*numTot', effBkg,fSigAll,numTot)");
0694 TString sPass = "signal", sFail = "signal";
0695 if (w->pdf("signalPass") != nullptr && w->pdf("signalFail") != nullptr) {
0696 if (w->pdf("signal") != nullptr)
0697 throw std::logic_error(
0698 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0699 sPass = "signalPass";
0700 sFail = "signalFail";
0701 } else if (w->pdf("signal") != nullptr) {
0702 if (w->pdf("signalPass") != nullptr || w->pdf("signalFail") != nullptr) {
0703 throw std::logic_error(
0704 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0705 }
0706 } else {
0707 throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
0708 }
0709 w->factory("SUM::pdfPass(nSignalPass*" + sPass + ", nBkgPass*backgroundPass)");
0710 w->factory("SUM::pdfFail(nSignalFail*" + sFail + ", nBkgFail*backgroundFail)");
0711
0712 w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
0713
0714 if (w->pdf("simPdf") == nullptr)
0715 throw std::runtime_error("Could not create simultaneous fit pdf.");
0716 if (w->var("signalFractionInPassing") == nullptr)
0717 w->factory("signalFractionInPassing[0.9]");
0718 }
0719
0720 void TagProbeFitter::setInitialValues(RooWorkspace* w) {
0721
0722 double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0723 double totFailing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0724
0725
0726
0727 w->var("numTot")->setVal(totPassing + totFailing);
0728 w->var("numTot")->setMax(2.0 * (totPassing + totFailing) + 10);
0729
0730 if (totPassing == 0) {
0731 w->var("efficiency")->setVal(0.0);
0732 w->var("efficiency")->setAsymError(0, 1);
0733 w->var("efficiency")->setConstant(false);
0734 } else if (totFailing == 0) {
0735 w->var("efficiency")->setVal(1.0);
0736 w->var("efficiency")->setAsymError(-1, 0);
0737 w->var("efficiency")->setConstant(false);
0738 } else {
0739 w->var("efficiency")->setConstant(false);
0740 }
0741
0742
0743 w->saveSnapshot("initialState", w->components());
0744 }
0745
0746 void TagProbeFitter::saveFitPlot(RooWorkspace* w) {
0747
0748 RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
0749 RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data"));
0750 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0751 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0752 RooAbsPdf& pdf = *w->pdf("simPdf");
0753 std::unique_ptr<RooArgSet> obs(pdf.getObservables(*dataAll));
0754 RooRealVar* mass = nullptr;
0755 for (RooAbsArg* v : *obs) {
0756 if (!v->InheritsFrom("RooRealVar"))
0757 continue;
0758 mass = (RooRealVar*)v;
0759 break;
0760 }
0761 if (!mass)
0762 return;
0763
0764 TCanvas canvas("fit_canvas");
0765 canvas.Divide(2, 2);
0766 vector<RooPlot*> frames;
0767
0768 canvas.cd(1);
0769 if (massBins == 0) {
0770 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
0771 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
0772 frames.push_back(mass->frame(Name("All"), Title("All Probes")));
0773 } else {
0774 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
0775 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
0776 frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
0777 }
0778 dataPass->plotOn(frames[0]);
0779 pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
0780 pdf.plotOn(frames[0],
0781 Slice(efficiencyCategory, "Passed"),
0782 ProjWData(*dataPass),
0783 LineColor(kGreen),
0784 Components("backgroundPass"),
0785 LineStyle(kDashed));
0786 frames[0]->Draw();
0787
0788 canvas.cd(2);
0789 dataFail->plotOn(frames[1]);
0790 pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
0791 pdf.plotOn(frames[1],
0792 Slice(efficiencyCategory, "Failed"),
0793 ProjWData(*dataFail),
0794 LineColor(kRed),
0795 Components("backgroundFail"),
0796 LineStyle(kDashed));
0797 frames[1]->Draw();
0798
0799 canvas.cd(3);
0800 dataAll->plotOn(frames.back());
0801 pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
0802 pdf.plotOn(frames.back(),
0803 ProjWData(*dataAll),
0804 LineColor(kBlue),
0805 Components("backgroundPass,backgroundFail"),
0806 LineStyle(kDashed));
0807 frames.back()->Draw();
0808
0809 canvas.cd(4);
0810 frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
0811 pdf.paramOn(
0812 frames.back(), RooFit::Label(""), RooFit::Format("NELU", AutoPrecision(0)), RooFit::Layout(0.1, 0.9, 0.9));
0813
0814 frames.back()->findObject(Form("%s_paramBox", pdf.GetName()))->Draw();
0815
0816 canvas.Draw();
0817 canvas.Write();
0818 for (size_t i = 0; i < frames.size(); i++) {
0819 delete frames[i];
0820 }
0821 delete dataPass;
0822 delete dataFail;
0823 }
0824
0825 void TagProbeFitter::saveDistributionsPlot(RooWorkspace* w) {
0826
0827 RooAbsData* dataAll = w->data("data");
0828 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0829 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0830
0831 const RooArgSet* vars = dataAll->get();
0832 vector<RooRealVar*> reals;
0833 for (RooAbsArg* v : *vars) {
0834 if (!v->InheritsFrom("RooRealVar"))
0835 continue;
0836 reals.push_back((RooRealVar*)v);
0837 }
0838 TCanvas canvas("distributions_canvas");
0839 canvas.Divide(3, reals.size());
0840 vector<RooPlot*> frames;
0841 for (unsigned int i = 0; i < reals.size(); i++) {
0842
0843 canvas.cd(3 * i + 1);
0844 frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
0845 dataPass->plotOn(frames.back(), LineColor(kGreen));
0846 dataPass->statOn(frames.back());
0847 frames.back()->Draw();
0848
0849 canvas.cd(3 * i + 2);
0850 frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
0851 dataFail->plotOn(frames.back(), LineColor(kRed));
0852 dataFail->statOn(frames.back());
0853 frames.back()->Draw();
0854
0855 canvas.cd(3 * i + 3);
0856 frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
0857 dataAll->plotOn(frames.back(), LineColor(kBlue));
0858 dataAll->statOn(frames.back());
0859 frames.back()->Draw();
0860 }
0861 canvas.Draw();
0862 canvas.Write();
0863 for (size_t i = 0; i < frames.size(); i++) {
0864 delete frames[i];
0865 }
0866 delete dataPass;
0867 delete dataFail;
0868 }
0869
0870 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff,
0871 const TString& effName,
0872 RooArgSet& binnedVariables,
0873 RooArgSet& mappedCategories) {
0874 bool isOnePoint =
0875 (eff.numEntries() == 1);
0876 for (auto it1 = binnedVariables.begin(); it1 != binnedVariables.end(); it1++) {
0877 RooRealVar* v1 = dynamic_cast<RooRealVar*>(*it1);
0878 RooArgSet binCategories1D;
0879 if (v1->numBins() == 1 && !isOnePoint)
0880 continue;
0881 for (auto it2 = binnedVariables.begin(); it2 != binnedVariables.end(); it2++) {
0882 RooRealVar* v2 = dynamic_cast<RooRealVar*>(*it2);
0883 if (v2 == v1)
0884 continue;
0885 if (v2->numBins() == 1 && !isOnePoint)
0886 continue;
0887 binCategories1D.addClone(
0888 RooBinningCategory(TString(v2->GetName()) + "_bins", TString(v2->GetName()) + "_bins", *v2));
0889
0890 RooArgSet binCategories2D;
0891 for (auto it3 = binnedVariables.begin(); it3 != binnedVariables.end(); it3++) {
0892 RooRealVar* v3 = dynamic_cast<RooRealVar*>(*it3);
0893 if (v3 == v1 || v3 == v2)
0894 continue;
0895 binCategories2D.addClone(
0896 RooBinningCategory(TString(v3->GetName()) + "_bins", TString(v3->GetName()) + "_bins", *v3));
0897 }
0898 RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
0899 if (allCats2D.numTypes() == 0) {
0900 makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
0901 } else {
0902 RooDataSet myEff(eff);
0903 myEff.addColumn(allCats2D);
0904 for (auto const& [catNameStr, catValue] : allCats2D) {
0905 TString catName = catNameStr;
0906 if (catName.Contains("NotMapped"))
0907 continue;
0908 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0909 makeEfficiencyPlot2D(myEff,
0910 *v1,
0911 *v2,
0912 TString::Format("%s_%s_PLOT_%s", v1->GetName(), v2->GetName(), catName.Data()),
0913 catName,
0914 effName,
0915 "allCats2D",
0916 catValue);
0917 }
0918 }
0919 }
0920 RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
0921 if (allCats1D.numTypes() == 0) {
0922 makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
0923 } else {
0924 RooDataSet myEff(eff);
0925 myEff.addColumn(allCats1D);
0926 for (auto const& [catNameStr, catValue] : allCats1D) {
0927 TString catName = catNameStr;
0928 if (catName.Contains("NotMapped"))
0929 continue;
0930 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0931 TString plotName = TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data());
0932 makeEfficiencyPlot1D(myEff, *v1, plotName, catName, effName, "allCats1D", catValue);
0933 }
0934 }
0935 }
0936 }
0937
0938 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff,
0939 RooRealVar& v,
0940 const TString& plotName,
0941 const TString& plotTitle,
0942 const TString& effName,
0943 const char* catName,
0944 int catIndex) {
0945 TGraphAsymmErrors* p = new TGraphAsymmErrors();
0946 const RooArgSet* entry = eff.get();
0947 const RooRealVar& vi = dynamic_cast<const RooRealVar&>(*entry->find(v.GetName()));
0948 const RooRealVar& ei = dynamic_cast<const RooRealVar&>(*entry->find("efficiency"));
0949 for (unsigned int i = 0, n = eff.numEntries(); i < n; ++i) {
0950 entry = eff.get(i);
0951 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
0952 continue;
0953 int j = p->GetN();
0954 p->Set(j + 1);
0955 p->SetPoint(j, vi.getVal(), ei.getVal());
0956 p->SetPointError(j, -vi.getAsymErrorLo(), vi.getAsymErrorHi(), -ei.getAsymErrorLo(), ei.getAsymErrorHi());
0957 }
0958 TCanvas canvas(plotName);
0959 TH1F* frame = new TH1F("frame", "Efficiency of " + effName, 1, v.getMin(), v.getMax());
0960 frame->SetDirectory(nullptr);
0961 p->SetNameTitle(Form("hxy_%s", eff.GetName()), "Efficiency of " + effName);
0962 p->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
0963 p->GetYaxis()->SetTitle("Efficiency of " + effName);
0964 frame->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
0965 frame->GetYaxis()->SetTitle("Efficiency of " + effName);
0966 frame->GetYaxis()->SetRangeUser(0, 1);
0967 frame->Draw();
0968 p->SetLineWidth(2);
0969 p->SetMarkerStyle(kFullCircle);
0970 p->SetMarkerSize(1.2);
0971 p->Draw("P SAME");
0972 canvas.Write();
0973 delete frame;
0974 delete p;
0975 }
0976
0977 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff,
0978 RooRealVar& v1,
0979 RooRealVar& v2,
0980 const TString& plotName,
0981 const TString& plotTitle,
0982 const TString& effName,
0983 const char* catName,
0984 int catIndex) {
0985 TCanvas canvas(plotName);
0986 canvas.SetRightMargin(0.15);
0987 auto h = std::make_unique<TH2F>(plotName,
0988 plotName,
0989 v1.getBinning().numBins(),
0990 v1.getBinning().array(),
0991 v2.getBinning().numBins(),
0992 v2.getBinning().array());
0993 const RooArgSet* set = eff.get();
0994 RooRealVar* e = (RooRealVar*)set->find("efficiency");
0995 RooRealVar* v1_ = (RooRealVar*)set->find(v1.GetName());
0996 RooRealVar* v2_ = (RooRealVar*)set->find(v2.GetName());
0997 h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s",
0998 plotTitle.Data(),
0999 v1.GetTitle(),
1000 TString(v1.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v1.getUnit()).Data(),
1001 v2.GetTitle(),
1002 TString(v2.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v2.getUnit()).Data(),
1003 effName.Data()));
1004 h->SetOption("colztexte");
1005 h->GetZaxis()->SetRangeUser(-0.001, 1.001);
1006 h->SetStats(false);
1007 for (int i = 0; i < eff.numEntries(); i++) {
1008 const RooArgSet* entry = eff.get(i);
1009 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1010 continue;
1011 h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
1012 h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi() - e->getErrorLo()) / 2.);
1013 }
1014 h->Draw();
1015 canvas.Draw();
1016 canvas.Write();
1017 }
1018
1019 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency) {}
1020
1021 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency) {
1022 int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
1023 int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
1024 double e = (pass + fail == 0) ? 0 : pass / double(pass + fail);
1025
1026 double alpha = (1.0 - .68540158589942957) / 2;
1027 double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile(alpha, pass, fail + 1);
1028 double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile(1 - alpha, pass + 1, fail);
1029
1030
1031
1032
1033 efficiency.setVal(e);
1034 efficiency.setAsymError(lo - e, hi - e);
1035 }
1036
1037 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix) {
1038 std::vector<std::string>::const_iterator it;
1039 for (it = fixVars.begin(); it < fixVars.end(); it++) {
1040 if (w->var((*it).c_str()))
1041 w->var((*it).c_str())->setAttribute("Constant", fix);
1042 else {
1043 std::cout << "TagProbeFitter: "
1044 << "Can't find a variable to fix: " << *it;
1045 }
1046 }
1047 }
1048
1049 void TagProbeFitter::varSaver(RooWorkspace* w) {
1050 if (!fixVarValues.empty()) {
1051 std::cout << "attempt to save variables more than once!" << std::endl;
1052 return;
1053 }
1054 for (std::string const& it : fixVars) {
1055 fixVarValues.push_back(w->var(it.c_str())->getVal());
1056 }
1057 }
1058
1059 void TagProbeFitter::varRestorer(RooWorkspace* w) {
1060 if (fixVarValues.size() == fixVars.size())
1061 for (unsigned int i = 0; i < fixVars.size(); i++) {
1062 std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
1063 w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
1064 }
1065 else {
1066 std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
1067 }
1068 }