File indexing completed on 2023-01-09 23:32:01
0001 #include "PhysicsTools/TagAndProbe/interface/TagProbeFitter.h"
0002 #include <memory>
0003
0004 #include <stdexcept>
0005
0006 #include "TROOT.h"
0007 #include "TFile.h"
0008 #include "TPad.h"
0009 #include "TText.h"
0010 #include "TCanvas.h"
0011 #include "TGraphAsymmErrors.h"
0012 #include "TH2F.h"
0013 #include "TStyle.h"
0014 #include "Math/QuantFuncMathCore.h"
0015 #include "RooAbsReal.h"
0016 #include "RooBinning.h"
0017 #include "RooBinningCategory.h"
0018 #include "RooCategory.h"
0019 #include "RooDataHist.h"
0020 #include "RooDataSet.h"
0021 #include "RooFitResult.h"
0022 #include "RooFormulaVar.h"
0023 #include "RooGlobalFunc.h"
0024 #include "RooLinkedListIter.h"
0025 #include "RooMappedCategory.h"
0026 #include "RooMinimizer.h"
0027 #include "RooMsgService.h"
0028 #include "RooMultiCategory.h"
0029 #include "RooNumIntConfig.h"
0030 #include "RooPlot.h"
0031 #include "RooProdPdf.h"
0032 #include "RooProfileLL.h"
0033 #include "RooRealVar.h"
0034 #include "RooThresholdCategory.h"
0035 #include "RooTrace.h"
0036 #include "RooWorkspace.h"
0037 #include "RooTreeDataStore.h"
0038
0039 using namespace std;
0040 using namespace RooFit;
0041
0042 TagProbeFitter::TagProbeFitter(const std::vector<std::string>& inputFileNames,
0043 string inputDirectoryName,
0044 string inputTreeName,
0045 string outputFileName,
0046 int numCPU_,
0047 bool saveWorkspace_,
0048 bool floatShapeParameters_,
0049 const std::vector<std::string>& fixVars_) {
0050 inputTree = new TChain((inputDirectoryName + "/" + inputTreeName).c_str());
0051 for (size_t i = 0; i < inputFileNames.size(); i++) {
0052 inputTree->Add(inputFileNames[i].c_str());
0053 }
0054 outputFile = new TFile(outputFileName.c_str(), "recreate");
0055 outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
0056 numCPU = numCPU_;
0057 saveWorkspace = saveWorkspace_;
0058 massBins = 0;
0059 floatShapeParameters = floatShapeParameters_;
0060 fixVars = fixVars_;
0061 weightVar = "";
0062 if (!floatShapeParameters && fixVars.empty())
0063 std::cout << "TagProbeFitter: "
0064 << "You wnat to fix some variables but do not specify them!";
0065
0066 gROOT->SetStyle("Plain");
0067 gStyle->SetTitleFillColor(0);
0068 gStyle->SetPalette(1);
0069 gStyle->SetOptStat(0);
0070 gStyle->SetPaintTextFormat(".2f");
0071
0072 quiet = false;
0073
0074 binnedFit = false;
0075
0076 doSaveDistributionsPlot = true;
0077
0078
0079 RooAbsReal::defaultIntegratorConfig()->setEpsAbs(1e-13);
0080 RooAbsReal::defaultIntegratorConfig()->setEpsRel(1e-13);
0081
0082 split_mode = 0;
0083 }
0084
0085 TagProbeFitter::~TagProbeFitter() {
0086 if (inputTree)
0087 delete inputTree;
0088 if (outputFile)
0089 outputFile->Close();
0090 }
0091
0092 void TagProbeFitter::setQuiet(bool quiet_) {
0093 quiet = quiet_;
0094 if (quiet) {
0095 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
0096 } else {
0097 RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
0098 }
0099 }
0100
0101 void TagProbeFitter::setSplitMode(unsigned int nevents) { split_mode = nevents; }
0102
0103 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units) {
0104 RooRealVar temp(name.c_str(), title.c_str(), low, hi, units.c_str());
0105 temp.setBins(5000, "cache");
0106 variables.addClone(temp);
0107 return true;
0108 }
0109
0110 bool TagProbeFitter::addCategory(string name, string title, string expression) {
0111 RooCategory* c = (RooCategory*)parameterParser.factory(expression.c_str());
0112 if (!c)
0113 return false;
0114
0115 c->SetName(name.c_str());
0116 c->SetTitle(title.c_str());
0117 variables.addClone(*c);
0118 return true;
0119 }
0120
0121 bool TagProbeFitter::addExpression(string expressionName,
0122 string title,
0123 string expression,
0124 const std::vector<string>& arguments) {
0125 expressionVars.push_back(make_pair(make_pair(expressionName, title), make_pair(expression, arguments)));
0126 return true;
0127 }
0128
0129 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue) {
0130 thresholdCategories.push_back(make_pair(make_pair(categoryName, title), make_pair(varName, cutValue)));
0131 return true;
0132 }
0133
0134 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands) { pdfs[name] = pdfCommands; }
0135
0136 void TagProbeFitter::setBinsForMassPlots(int bins) { massBins = bins; }
0137
0138 void TagProbeFitter::setWeightVar(const std::string& var) { weightVar = var; }
0139
0140 string TagProbeFitter::calculateEfficiency(string dirName,
0141 const std::vector<string>& effCats,
0142 const std::vector<string>& effStates,
0143 vector<string>& unbinnedVariables,
0144 map<string, vector<double> >& binnedReals,
0145 map<string, std::vector<string> >& binnedCategories,
0146 vector<string>& binToPDFmap) {
0147
0148 outputDirectory->cd();
0149
0150 gDirectory->mkdir(dirName.c_str())->cd();
0151
0152 RooArgSet dataVars;
0153
0154
0155 for (vector<string>::iterator v = unbinnedVariables.begin(); v != unbinnedVariables.end(); v++) {
0156 dataVars.addClone(variables[v->c_str()], true);
0157 if (binnedFit && (v == unbinnedVariables.begin())) {
0158 ((RooRealVar&)dataVars[v->c_str()]).setBins(massBins);
0159 }
0160 }
0161
0162 RooArgSet binnedVariables;
0163 RooArgSet binCategories;
0164 for (map<string, vector<double> >::iterator v = binnedReals.begin(); v != binnedReals.end(); v++) {
0165 TString name = v->first;
0166 if (variables.find(name) == nullptr) {
0167 cerr << "Binned variable '" << name << "' not found." << endl;
0168 return "Error";
0169 }
0170 binnedVariables.add(*dataVars.addClone(variables[name]));
0171 ((RooRealVar&)binnedVariables[name]).setBinning(RooBinning(v->second.size() - 1, &v->second[0]));
0172 binCategories.addClone(RooBinningCategory(name + "_bins", name + "_bins", (RooRealVar&)binnedVariables[name]));
0173 }
0174
0175
0176 RooArgSet categories;
0177 RooArgSet mappedCategories;
0178 for (map<string, vector<string> >::iterator v = binnedCategories.begin(); v != binnedCategories.end(); v++) {
0179 TString name = v->first;
0180 if (variables.find(name) == nullptr) {
0181 cerr << "Binned category '" << name << "' not found." << endl;
0182 return "Error";
0183 }
0184 categories.add(*dataVars.addClone(variables[name]));
0185 mappedCategories.addClone(RooMappedCategory(name + "_bins", name + "_bins", (RooCategory&)categories[name]));
0186 for (unsigned int i = 0; i < v->second.size(); i++) {
0187 ((RooMappedCategory&)mappedCategories[name + "_bins"])
0188 .map(v->second[i].c_str(), name + "_" + TString(v->second[i].c_str()).ReplaceAll(",", "_"));
0189 }
0190 }
0191
0192
0193 for (vector<string>::const_iterator effCat = effCats.begin(); effCat != effCats.end(); ++effCat) {
0194 if (variables.find(effCat->c_str()) != nullptr) {
0195 dataVars.addClone(variables[effCat->c_str()], true);
0196 }
0197 }
0198
0199
0200 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(),
0201 eve = expressionVars.end();
0202 ev != eve;
0203 ++ev) {
0204 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
0205
0206 if (variables.find(it->c_str()))
0207 dataVars.addClone(variables[it->c_str()], true);
0208 }
0209 }
0210
0211 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0212 tce = thresholdCategories.end();
0213 tc != tce;
0214 ++tc) {
0215 if (variables.find(tc->second.first.c_str()))
0216 dataVars.addClone(variables[tc->second.first.c_str()], true);
0217 }
0218
0219
0220 RooDataSet* data(nullptr);
0221 if (not split_mode) {
0222 data = new RooDataSet("data",
0223 "data",
0224 inputTree,
0225 dataVars,
0226 "",
0227 (weightVar.empty() ? nullptr : weightVar.c_str()));
0228
0229
0230 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0231 ev = expressionVars.begin(),
0232 eve = expressionVars.end();
0233 ev != eve;
0234 ++ev) {
0235 RooArgList args;
0236 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0237 ++it) {
0238 args.add(dataVars[it->c_str()]);
0239 }
0240 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0241 RooRealVar* col = (RooRealVar*)data->addColumn(expr);
0242 dataVars.addClone(*col);
0243 }
0244
0245
0246 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0247 tce = thresholdCategories.end();
0248 tc != tce;
0249 ++tc) {
0250 RooThresholdCategory tmp(tc->first.first.c_str(),
0251 tc->first.second.c_str(),
0252 (RooAbsReal&)dataVars[tc->second.first.c_str()],
0253 "above",
0254 1);
0255 tmp.addThreshold(tc->second.second, "below", 0);
0256 RooCategory* cat = (RooCategory*)data->addColumn(tmp);
0257 dataVars.addClone(*cat);
0258 }
0259 }
0260
0261
0262 RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
0263 string effName;
0264
0265 if (not split_mode) {
0266 data->addColumn(allCats);
0267
0268 if (effCats.size() == 1) {
0269 effName = effCats.front() + "::" + effStates.front();
0270 RooMappedCategory efficiencyCategory(
0271 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0272 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0273 data->addColumn(efficiencyCategory);
0274 } else {
0275 RooArgSet rooEffCats;
0276 string multiState = "{";
0277 for (size_t i = 0; i < effCats.size(); ++i) {
0278 if (i) {
0279 multiState += ";";
0280 effName += " && ";
0281 }
0282 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0283 multiState += effStates[i];
0284 effName = effCats[i] + "::" + effStates[i];
0285 }
0286 multiState += "}";
0287 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0288 RooMappedCategory efficiencyCategory(
0289 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0290 efficiencyCategory.map(multiState.c_str(), "Passed");
0291 data->addColumn(efficiencyCategory);
0292 }
0293 }
0294
0295
0296 RooMappedCategory pdfCategory(
0297 "_pdfCategory_", "_pdfCategory_", allCats, (!binToPDFmap.empty()) ? binToPDFmap[0].c_str() : "all");
0298 for (unsigned int i = 1; i < binToPDFmap.size(); i += 2) {
0299 pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i + 1].c_str());
0300 }
0301 if (not split_mode)
0302 data->addColumn(pdfCategory);
0303
0304
0305 RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
0306
0307 RooDataSet fitEfficiency("fit_eff",
0308 "Efficiency from unbinned ML fit",
0309 RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0310 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0311
0312 RooDataSet cntEfficiency("cnt_eff",
0313 "Efficiency from counting",
0314 RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0315 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0316
0317 if (not split_mode) {
0318 if (!floatShapeParameters) {
0319
0320 RooWorkspace* w = new RooWorkspace();
0321 w->import(*data);
0322 efficiency.setVal(0);
0323 efficiency.setAsymError(0, 0);
0324 std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
0325 doFitEfficiency(w, pdfCategory.getLabel(), efficiency);
0326 delete w;
0327 }
0328 } else {
0329
0330 inputTree->SetBranchStatus("*", false);
0331 TIterator* iter = dataVars.createIterator();
0332 TObject* obj(nullptr);
0333 while ((obj = iter->Next()))
0334 inputTree->SetBranchStatus(obj->GetName(), true);
0335 }
0336
0337
0338
0339
0340 std::vector<Int_t> allCatValues;
0341 TIterator* it = allCats.typeIterator();
0342 for (RooCatType* t = (RooCatType*)it->Next(); t != nullptr; t = (RooCatType*)it->Next())
0343 allCatValues.push_back(t->getVal());
0344 for (auto iCat : allCatValues) {
0345 const RooCatType* t = allCats.lookupType(iCat);
0346
0347 TString catName = t->GetName();
0348
0349 if (catName.Contains("NotMapped"))
0350 continue;
0351
0352 RooDataSet* data_bin(nullptr);
0353 RooArgSet tmpVars;
0354
0355 if (not split_mode) {
0356
0357 data_bin = (RooDataSet*)data->reduce(Cut(TString::Format("allCats==%d", iCat)));
0358 } else {
0359 data_bin = new RooDataSet("data", "data", dataVars, (weightVar.empty() ? nullptr : weightVar.c_str()));
0360
0361 TDirectory* tmp = gDirectory;
0362 gROOT->cd();
0363
0364
0365
0366 unsigned int n_entries = inputTree->GetEntries();
0367 printf("Input number of events: %u\n", n_entries);
0368 unsigned int first_entry = 0;
0369 while (first_entry < n_entries) {
0370 TTree* copyTree = inputTree->CopyTree("", "", split_mode, first_entry);
0371 RooTreeDataStore store("reader",
0372 "reader",
0373 dataVars,
0374 *copyTree,
0375 "",
0376 (weightVar.empty() ? nullptr : weightVar.c_str()));
0377 for (unsigned int i = 0; i < store.GetEntries(); ++i) {
0378 store.get(i);
0379 if (allCats.getIndex() == iCat) {
0380 data_bin->add(dataVars, weightVar.empty() ? 1.0 : dataVars.getRealValue(weightVar.c_str()));
0381 }
0382 }
0383 delete copyTree;
0384 first_entry += split_mode;
0385 data_bin->Print("V");
0386 }
0387 data_bin->Print("V");
0388 tmp->cd();
0389
0390
0391 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0392 ev = expressionVars.begin(),
0393 eve = expressionVars.end();
0394 ev != eve;
0395 ++ev) {
0396 RooArgList args;
0397 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0398 ++it) {
0399 args.add(dataVars[it->c_str()]);
0400 }
0401 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0402 RooRealVar* col = (RooRealVar*)data_bin->addColumn(expr);
0403 tmpVars.add(*dataVars.addClone(*col));
0404 }
0405
0406
0407 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0408 tce = thresholdCategories.end();
0409 tc != tce;
0410 ++tc) {
0411 RooThresholdCategory tmp(tc->first.first.c_str(),
0412 tc->first.second.c_str(),
0413 (RooAbsReal&)dataVars[tc->second.first.c_str()],
0414 "above",
0415 1);
0416 tmp.addThreshold(tc->second.second, "below", 0);
0417 RooCategory* cat = (RooCategory*)data_bin->addColumn(tmp);
0418 tmpVars.add(*dataVars.addClone(*cat));
0419 }
0420
0421
0422 if (effCats.size() == 1) {
0423 effName = effCats.front() + "::" + effStates.front();
0424 RooMappedCategory efficiencyCategory(
0425 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0426 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0427 data_bin->addColumn(efficiencyCategory);
0428 } else {
0429 RooArgSet rooEffCats;
0430 string multiState = "{";
0431 for (size_t i = 0; i < effCats.size(); ++i) {
0432 if (i) {
0433 multiState += ";";
0434 effName += " && ";
0435 }
0436 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0437 multiState += effStates[i];
0438 effName = effCats[i] + "::" + effStates[i];
0439 }
0440 multiState += "}";
0441 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0442 RooMappedCategory efficiencyCategory(
0443 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0444 efficiencyCategory.map(multiState.c_str(), "Passed");
0445 data_bin->addColumn(efficiencyCategory);
0446 }
0447 data_bin->addColumn(pdfCategory);
0448 }
0449
0450
0451 const RooArgSet* row = data_bin->get();
0452
0453
0454 TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
0455
0456
0457 TString dirName = catName;
0458 dirName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "__");
0459 if (pdfName.Length() > 0) {
0460 dirName.Append("__").Append(pdfName);
0461 }
0462
0463 cout << "Fitting bin: " << dirName << endl;
0464
0465 gDirectory->mkdir(dirName)->cd();
0466
0467
0468 RooWorkspace* w = new RooWorkspace();
0469
0470 w->import(*data_bin);
0471 delete data_bin;
0472 data_bin =
0473 dynamic_cast<RooDataSet*>(w->data("data"));
0474
0475
0476 if (doSaveDistributionsPlot)
0477 saveDistributionsPlot(w);
0478
0479 if (data_bin->numEntries() > 0) {
0480
0481 RooArgSet meanOfVariables;
0482 RooLinkedListIter vit = binnedVariables.iterator();
0483 for (RooRealVar* v = (RooRealVar*)vit.Next(); v != nullptr; v = (RooRealVar*)vit.Next()) {
0484 meanOfVariables.addClone(*v);
0485 double mean = w->data("data")->mean(*v);
0486 RooBinning binning((RooBinning&)v->getBinning());
0487 int ind = binning.binNumber(mean);
0488 RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
0489 newVar.setVal(mean);
0490 newVar.setAsymError(binning.binLow(ind) - mean, binning.binHigh(ind) - mean);
0491 }
0492
0493
0494
0495 meanOfVariables.addClone(*data_bin->get(0), true);
0496
0497 efficiency.setVal(0);
0498 efficiency.setAsymError(0, 0);
0499 doFitEfficiency(w, pdfName.Data(), efficiency);
0500 fitEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0501
0502
0503
0504
0505
0506 efficiency.setVal(0);
0507 doCntEfficiency(w, efficiency);
0508 cntEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0509 }
0510
0511 if (saveWorkspace) {
0512 w->Write("w");
0513 }
0514
0515 delete w;
0516 if (split_mode)
0517 dataVars.remove(tmpVars);
0518
0519 gDirectory->cd("..");
0520 }
0521
0522
0523 fitEfficiency.Write();
0524 gDirectory->mkdir("fit_eff_plots")->cd();
0525 saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
0526 gDirectory->cd("..");
0527
0528
0529
0530
0531
0532
0533 cntEfficiency.Write();
0534 gDirectory->mkdir("cnt_eff_plots")->cd();
0535 saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
0536 gDirectory->cd("..");
0537
0538 if (not split_mode)
0539 delete data;
0540
0541
0542 return "";
0543 }
0544
0545 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency) {
0546
0547 if (pdfName == "all") {
0548 return;
0549 }
0550
0551 createPdf(w, pdfs[pdfName]);
0552
0553 setInitialValues(w);
0554 std::unique_ptr<RooFitResult> res;
0555
0556 RooAbsData* data = w->data("data");
0557 std::unique_ptr<RooDataHist> bdata;
0558 if (binnedFit) {
0559
0560 const RooArgSet* dataObs = data->get(0);
0561
0562 RooArgSet* obs = w->pdf("simPdf")->getObservables(dataObs);
0563 bdata = std::make_unique<RooDataHist>("data_binned", "data_binned", *obs, *data);
0564 w->import(*bdata);
0565 data = w->data("data_binned");
0566 delete obs;
0567 }
0568
0569 double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0570 double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0571
0572 RooAbsReal* simNLL = w->pdf("simPdf")->createNLL(*data, Extended(true), NumCPU(numCPU));
0573
0574 RooMinimizer minimizer(*simNLL);
0575 minimizer.setStrategy(1);
0576 minimizer.setProfile(true);
0577 RooProfileLL profileLL("simPdfNLL", "", *simNLL, *w->var("efficiency"));
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594 if (!fixVars.empty()) {
0595
0596 if (!floatShapeParameters && fixVarValues.empty()) {
0597
0598
0599
0600
0601
0602
0603
0604
0605 varFixer(w, true);
0606
0607 minimizer.minimize("Minuit2", "Scan");
0608 minimizer.migrad();
0609 minimizer.hesse();
0610
0611
0612
0613
0614 varFixer(w, false);
0615
0616 minimizer.minimize("Minuit2", "Scan");
0617 minimizer.migrad();
0618 minimizer.hesse();
0619
0620
0621
0622
0623 varSaver(w);
0624
0625 }
0626
0627
0628 if (!floatShapeParameters)
0629 varRestorer(w);
0630
0631
0632 minimizer.minimize("Minuit2", "Scan");
0633 minimizer.migrad();
0634 minimizer.hesse();
0635
0636 profileLL.getVal();
0637 RooMinimizer* profMinuit = profileLL.minimizer();
0638 profMinuit->setProfile(true);
0639 profMinuit->setStrategy(2);
0640 profMinuit->setPrintLevel(1);
0641 profMinuit->minos(*w->var("efficiency"));
0642 res.reset(profMinuit->save());
0643
0644
0645
0646 }
0647
0648
0649 if (floatShapeParameters) {
0650
0651 varFixer(w, false);
0652
0653
0654 minimizer.minimize("Minuit2", "Scan");
0655 minimizer.migrad();
0656 minimizer.hesse();
0657 res.reset(w->pdf("simPdf")->fitTo(*data,
0658 Save(true),
0659 Extended(true),
0660 NumCPU(numCPU),
0661 Strategy(2),
0662 Minos(*w->var("efficiency")),
0663 PrintLevel(quiet ? -1 : 1),
0664 PrintEvalErrors(quiet ? -1 : 1),
0665 Warnings(!quiet)));
0666 }
0667
0668
0669 res->Write("fitresults");
0670 w->saveSnapshot("finalState", w->components());
0671 saveFitPlot(w);
0672
0673 RooRealVar* e = (RooRealVar*)res->floatParsFinal().find("efficiency");
0674
0675
0676 efficiency.setVal(e->getVal());
0677 Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
0678 if (errLo == 0 && e->getVal() < 0.5)
0679 errLo = e->getMin() - e->getVal();
0680 if (errHi == 0 && e->getVal() > 0.5)
0681 errHi = e->getMax() - e->getVal();
0682 efficiency.setAsymError(errLo, errHi);
0683
0684 if (totPassing * totFailing == 0) {
0685 RooRealVar* nTot = (RooRealVar*)res->floatParsFinal().find("numTot");
0686 RooRealVar* fSig = (RooRealVar*)res->floatParsFinal().find("fSigAll");
0687 double cerr = ROOT::Math::beta_quantile(1 - (1.0 - .68540158589942957) / 2, 1, nTot->getVal() * fSig->getVal());
0688
0689
0690
0691
0692
0693
0694
0695
0696
0697 if (totPassing == 0) {
0698 efficiency.setVal(0);
0699 efficiency.setAsymError(0, cerr);
0700 } else {
0701 efficiency.setVal(1);
0702 efficiency.setAsymError(-cerr, 0);
0703 }
0704 }
0705
0706 delete simNLL;
0707 }
0708
0709 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands) {
0710
0711 for (unsigned int i = 0; i < pdfCommands.size(); i++) {
0712 const std::string& command = pdfCommands[i];
0713 if (command.find("#import ") == 0) {
0714 TDirectory* here = gDirectory;
0715 w->import(command.substr(8).c_str());
0716 here->cd();
0717 } else {
0718 TDirectory* here = gDirectory;
0719 w->factory(command.c_str());
0720 here->cd();
0721 }
0722 }
0723
0724
0725 w->factory("expr::nSignalPass('efficiency*fSigAll*numTot', efficiency, fSigAll[.9,0,1],numTot[1,0,1e10])");
0726 w->factory("expr::nSignalFail('(1-efficiency)*fSigAll*numTot', efficiency, fSigAll,numTot)");
0727 w->factory("expr::nBkgPass('effBkg*(1-fSigAll)*numTot', effBkg[.9,0,1],fSigAll,numTot)");
0728 w->factory("expr::nBkgFail('(1-effBkg)*(1-fSigAll)*numTot', effBkg,fSigAll,numTot)");
0729 TString sPass = "signal", sFail = "signal";
0730 if (w->pdf("signalPass") != nullptr && w->pdf("signalFail") != nullptr) {
0731 if (w->pdf("signal") != nullptr)
0732 throw std::logic_error(
0733 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0734 sPass = "signalPass";
0735 sFail = "signalFail";
0736 } else if (w->pdf("signal") != nullptr) {
0737 if (w->pdf("signalPass") != nullptr || w->pdf("signalFail") != nullptr) {
0738 throw std::logic_error(
0739 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0740 }
0741 } else {
0742 throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
0743 }
0744 w->factory("SUM::pdfPass(nSignalPass*" + sPass + ", nBkgPass*backgroundPass)");
0745 w->factory("SUM::pdfFail(nSignalFail*" + sFail + ", nBkgFail*backgroundFail)");
0746
0747 w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
0748
0749 if (w->pdf("simPdf") == nullptr)
0750 throw std::runtime_error("Could not create simultaneous fit pdf.");
0751 if (w->var("signalFractionInPassing") == nullptr)
0752 w->factory("signalFractionInPassing[0.9]");
0753 }
0754
0755 void TagProbeFitter::setInitialValues(RooWorkspace* w) {
0756
0757 double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0758 double totFailing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0759
0760
0761
0762 w->var("numTot")->setVal(totPassing + totFailing);
0763 w->var("numTot")->setMax(2.0 * (totPassing + totFailing) + 10);
0764
0765 if (totPassing == 0) {
0766 w->var("efficiency")->setVal(0.0);
0767 w->var("efficiency")->setAsymError(0, 1);
0768 w->var("efficiency")->setConstant(false);
0769 } else if (totFailing == 0) {
0770 w->var("efficiency")->setVal(1.0);
0771 w->var("efficiency")->setAsymError(-1, 0);
0772 w->var("efficiency")->setConstant(false);
0773 } else {
0774 w->var("efficiency")->setConstant(false);
0775 }
0776
0777
0778 w->saveSnapshot("initialState", w->components());
0779 }
0780
0781 void TagProbeFitter::saveFitPlot(RooWorkspace* w) {
0782
0783 RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
0784 RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data"));
0785 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0786 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0787 RooAbsPdf& pdf = *w->pdf("simPdf");
0788 std::unique_ptr<RooArgSet> obs(pdf.getObservables(*dataAll));
0789 RooRealVar* mass = nullptr;
0790 RooLinkedListIter it = obs->iterator();
0791 for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0792 if (!v->InheritsFrom("RooRealVar"))
0793 continue;
0794 mass = (RooRealVar*)v;
0795 break;
0796 }
0797 if (!mass)
0798 return;
0799
0800 TCanvas canvas("fit_canvas");
0801 canvas.Divide(2, 2);
0802 vector<RooPlot*> frames;
0803
0804 canvas.cd(1);
0805 if (massBins == 0) {
0806 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
0807 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
0808 frames.push_back(mass->frame(Name("All"), Title("All Probes")));
0809 } else {
0810 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
0811 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
0812 frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
0813 }
0814 dataPass->plotOn(frames[0]);
0815 pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
0816 pdf.plotOn(frames[0],
0817 Slice(efficiencyCategory, "Passed"),
0818 ProjWData(*dataPass),
0819 LineColor(kGreen),
0820 Components("backgroundPass"),
0821 LineStyle(kDashed));
0822 frames[0]->Draw();
0823
0824 canvas.cd(2);
0825 dataFail->plotOn(frames[1]);
0826 pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
0827 pdf.plotOn(frames[1],
0828 Slice(efficiencyCategory, "Failed"),
0829 ProjWData(*dataFail),
0830 LineColor(kRed),
0831 Components("backgroundFail"),
0832 LineStyle(kDashed));
0833 frames[1]->Draw();
0834
0835 canvas.cd(3);
0836 dataAll->plotOn(frames.back());
0837 pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
0838 pdf.plotOn(frames.back(),
0839 ProjWData(*dataAll),
0840 LineColor(kBlue),
0841 Components("backgroundPass,backgroundFail"),
0842 LineStyle(kDashed));
0843 frames.back()->Draw();
0844
0845 canvas.cd(4);
0846 frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
0847 pdf.paramOn(frames.back(), dataAll, "", 0, "NELU", 0.1, 0.9, 0.9);
0848
0849 frames.back()->findObject(Form("%s_paramBox", pdf.GetName()))->Draw();
0850
0851 canvas.Draw();
0852 canvas.Write();
0853 for (size_t i = 0; i < frames.size(); i++) {
0854 delete frames[i];
0855 }
0856 delete dataPass;
0857 delete dataFail;
0858 }
0859
0860 void TagProbeFitter::saveDistributionsPlot(RooWorkspace* w) {
0861
0862 RooAbsData* dataAll = w->data("data");
0863 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0864 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0865
0866 const RooArgSet* vars = dataAll->get();
0867 vector<RooRealVar*> reals;
0868 RooLinkedListIter it = vars->iterator();
0869 for (RooAbsArg* v = (RooAbsArg*)it.Next(); v != nullptr; v = (RooAbsArg*)it.Next()) {
0870 if (!v->InheritsFrom("RooRealVar"))
0871 continue;
0872 reals.push_back((RooRealVar*)v);
0873 }
0874 TCanvas canvas("distributions_canvas");
0875 canvas.Divide(3, reals.size());
0876 vector<RooPlot*> frames;
0877 for (unsigned int i = 0; i < reals.size(); i++) {
0878
0879 canvas.cd(3 * i + 1);
0880 frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
0881 dataPass->plotOn(frames.back(), LineColor(kGreen));
0882 dataPass->statOn(frames.back());
0883 frames.back()->Draw();
0884
0885 canvas.cd(3 * i + 2);
0886 frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
0887 dataFail->plotOn(frames.back(), LineColor(kRed));
0888 dataFail->statOn(frames.back());
0889 frames.back()->Draw();
0890
0891 canvas.cd(3 * i + 3);
0892 frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
0893 dataAll->plotOn(frames.back(), LineColor(kBlue));
0894 dataAll->statOn(frames.back());
0895 frames.back()->Draw();
0896 }
0897 canvas.Draw();
0898 canvas.Write();
0899 for (size_t i = 0; i < frames.size(); i++) {
0900 delete frames[i];
0901 }
0902 delete dataPass;
0903 delete dataFail;
0904 }
0905
0906 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff,
0907 const TString& effName,
0908 RooArgSet& binnedVariables,
0909 RooArgSet& mappedCategories) {
0910 RooLinkedListIter v1it = binnedVariables.iterator();
0911 bool isOnePoint =
0912 (eff.numEntries() == 1);
0913 for (RooRealVar* v1 = (RooRealVar*)v1it.Next(); v1 != nullptr; v1 = (RooRealVar*)v1it.Next()) {
0914 RooArgSet binCategories1D;
0915 if (v1->numBins() == 1 && !isOnePoint)
0916 continue;
0917 RooLinkedListIter v2it = binnedVariables.iterator();
0918 for (RooRealVar* v2 = (RooRealVar*)v2it.Next(); v2 != nullptr; v2 = (RooRealVar*)v2it.Next()) {
0919 if (v2 == v1)
0920 continue;
0921 if (v2->numBins() == 1 && !isOnePoint)
0922 continue;
0923 binCategories1D.addClone(
0924 RooBinningCategory(TString(v2->GetName()) + "_bins", TString(v2->GetName()) + "_bins", *v2));
0925
0926 RooArgSet binCategories2D;
0927 RooLinkedListIter v3it = binnedVariables.iterator();
0928 for (RooRealVar* v3 = (RooRealVar*)v3it.Next(); v3 != nullptr; v3 = (RooRealVar*)v3it.Next()) {
0929 if (v3 == v1 || v3 == v2)
0930 continue;
0931 binCategories2D.addClone(
0932 RooBinningCategory(TString(v3->GetName()) + "_bins", TString(v3->GetName()) + "_bins", *v3));
0933 }
0934 RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
0935 if (allCats2D.numTypes() == 0) {
0936 makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
0937 } else {
0938 RooDataSet myEff(eff);
0939 myEff.addColumn(allCats2D);
0940 std::unique_ptr<TIterator> catIt(allCats2D.typeIterator());
0941 for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0942 TString catName = t->GetName();
0943 if (catName.Contains("NotMapped"))
0944 continue;
0945 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0946 makeEfficiencyPlot2D(myEff,
0947 *v1,
0948 *v2,
0949 TString::Format("%s_%s_PLOT_%s", v1->GetName(), v2->GetName(), catName.Data()),
0950 catName,
0951 effName,
0952 "allCats2D",
0953 t->getVal());
0954 }
0955 }
0956 }
0957 RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
0958 if (allCats1D.numTypes() == 0) {
0959 makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
0960 } else {
0961 RooDataSet myEff(eff);
0962 myEff.addColumn(allCats1D);
0963 std::unique_ptr<TIterator> catIt(allCats1D.typeIterator());
0964 for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0965 TString catName = t->GetName();
0966 if (catName.Contains("NotMapped"))
0967 continue;
0968 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0969 makeEfficiencyPlot1D(myEff,
0970 *v1,
0971 TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data()),
0972 catName,
0973 effName,
0974 "allCats1D",
0975 t->getVal());
0976 }
0977 }
0978 }
0979 }
0980
0981 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff,
0982 RooRealVar& v,
0983 const TString& plotName,
0984 const TString& plotTitle,
0985 const TString& effName,
0986 const char* catName,
0987 int catIndex) {
0988 TGraphAsymmErrors* p = new TGraphAsymmErrors();
0989 const RooArgSet* entry = eff.get();
0990 const RooRealVar& vi = dynamic_cast<const RooRealVar&>(*entry->find(v.GetName()));
0991 const RooRealVar& ei = dynamic_cast<const RooRealVar&>(*entry->find("efficiency"));
0992 for (unsigned int i = 0, n = eff.numEntries(); i < n; ++i) {
0993 entry = eff.get(i);
0994 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
0995 continue;
0996 int j = p->GetN();
0997 p->Set(j + 1);
0998 p->SetPoint(j, vi.getVal(), ei.getVal());
0999 p->SetPointError(j, -vi.getAsymErrorLo(), vi.getAsymErrorHi(), -ei.getAsymErrorLo(), ei.getAsymErrorHi());
1000 }
1001 TCanvas canvas(plotName);
1002 TH1F* frame = new TH1F("frame", "Efficiency of " + effName, 1, v.getMin(), v.getMax());
1003 frame->SetDirectory(nullptr);
1004 p->SetNameTitle(Form("hxy_%s", eff.GetName()), "Efficiency of " + effName);
1005 p->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1006 p->GetYaxis()->SetTitle("Efficiency of " + effName);
1007 frame->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1008 frame->GetYaxis()->SetTitle("Efficiency of " + effName);
1009 frame->GetYaxis()->SetRangeUser(0, 1);
1010 frame->Draw();
1011 p->SetLineWidth(2);
1012 p->SetMarkerStyle(kFullCircle);
1013 p->SetMarkerSize(1.2);
1014 p->Draw("P SAME");
1015 canvas.Write();
1016 delete frame;
1017 delete p;
1018 }
1019
1020 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff,
1021 RooRealVar& v1,
1022 RooRealVar& v2,
1023 const TString& plotName,
1024 const TString& plotTitle,
1025 const TString& effName,
1026 const char* catName,
1027 int catIndex) {
1028 TCanvas canvas(plotName);
1029 canvas.SetRightMargin(0.15);
1030 TH2F* h = new TH2F(plotName,
1031 plotName,
1032 v1.getBinning().numBins(),
1033 v1.getBinning().array(),
1034 v2.getBinning().numBins(),
1035 v2.getBinning().array());
1036 const RooArgSet* set = eff.get();
1037 RooRealVar* e = (RooRealVar*)set->find("efficiency");
1038 RooRealVar* v1_ = (RooRealVar*)set->find(v1.GetName());
1039 RooRealVar* v2_ = (RooRealVar*)set->find(v2.GetName());
1040 h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s",
1041 plotTitle.Data(),
1042 v1.GetTitle(),
1043 TString(v1.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v1.getUnit()).Data(),
1044 v2.GetTitle(),
1045 TString(v2.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v2.getUnit()).Data(),
1046 effName.Data()));
1047 h->SetOption("colztexte");
1048 h->GetZaxis()->SetRangeUser(-0.001, 1.001);
1049 h->SetStats(kFALSE);
1050 for (int i = 0; i < eff.numEntries(); i++) {
1051 const RooArgSet* entry = eff.get(i);
1052 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1053 continue;
1054 h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
1055 h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi() - e->getErrorLo()) / 2.);
1056 }
1057 h->Draw();
1058 canvas.Draw();
1059 canvas.Write();
1060 delete h;
1061 }
1062
1063 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency) {}
1064
1065 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency) {
1066 int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
1067 int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
1068 double e = (pass + fail == 0) ? 0 : pass / double(pass + fail);
1069
1070 double alpha = (1.0 - .68540158589942957) / 2;
1071 double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile(alpha, pass, fail + 1);
1072 double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile(1 - alpha, pass + 1, fail);
1073
1074
1075
1076
1077 efficiency.setVal(e);
1078 efficiency.setAsymError(lo - e, hi - e);
1079 }
1080
1081 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix) {
1082 std::vector<std::string>::const_iterator it;
1083 for (it = fixVars.begin(); it < fixVars.end(); it++) {
1084 if (w->var((*it).c_str()))
1085 w->var((*it).c_str())->setAttribute("Constant", fix);
1086 else {
1087 std::cout << "TagProbeFitter: "
1088 << "Can't find a variable to fix: " << *it;
1089 }
1090 }
1091 }
1092
1093 void TagProbeFitter::varSaver(RooWorkspace* w) {
1094 if (!fixVarValues.empty()) {
1095 std::cout << "attempt to save variables more than once!" << std::endl;
1096 return;
1097 }
1098 std::vector<std::string>::const_iterator it;
1099 for (it = fixVars.begin(); it < fixVars.end(); it++) {
1100 fixVarValues.push_back(w->var((*it).c_str())->getVal());
1101 }
1102 }
1103
1104 void TagProbeFitter::varRestorer(RooWorkspace* w) {
1105 if (fixVarValues.size() == fixVars.size())
1106 for (unsigned int i = 0; i < fixVars.size(); i++) {
1107 std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
1108 w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
1109 }
1110 else {
1111 std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
1112 }
1113 }