File indexing completed on 2024-04-06 12:24:12
0001 #include "PhysicsTools/TagAndProbe/interface/TagProbeFitter.h"
0002 #include <memory>
0003
0004 #include <stdexcept>
0005
0006 #include "TROOT.h"
0007 #include "TFile.h"
0008 #include "TPad.h"
0009 #include "TText.h"
0010 #include "TCanvas.h"
0011 #include "TGraphAsymmErrors.h"
0012 #include "TH2F.h"
0013 #include "TStyle.h"
0014 #include "Math/QuantFuncMathCore.h"
0015 #include "RooAbsReal.h"
0016 #include "RooBinning.h"
0017 #include "RooBinningCategory.h"
0018 #include "RooCategory.h"
0019 #include "RooDataHist.h"
0020 #include "RooDataSet.h"
0021 #include "RooFitLegacy/RooCatTypeLegacy.h"
0022 #include "RooFitResult.h"
0023 #include "RooFormulaVar.h"
0024 #include "RooGlobalFunc.h"
0025 #include "RooLinkedListIter.h"
0026 #include "RooMappedCategory.h"
0027 #include "RooMinimizer.h"
0028 #include "RooMsgService.h"
0029 #include "RooMultiCategory.h"
0030 #include "RooNumIntConfig.h"
0031 #include "RooPlot.h"
0032 #include "RooProdPdf.h"
0033 #include "RooProfileLL.h"
0034 #include "RooRealVar.h"
0035 #include "RooThresholdCategory.h"
0036 #include "RooTrace.h"
0037 #include "RooWorkspace.h"
0038 #include "RooTreeDataStore.h"
0039
0040 using namespace std;
0041 using namespace RooFit;
0042
0043 TagProbeFitter::TagProbeFitter(const std::vector<std::string>& inputFileNames,
0044 string inputDirectoryName,
0045 string inputTreeName,
0046 string outputFileName,
0047 int numCPU_,
0048 bool saveWorkspace_,
0049 bool floatShapeParameters_,
0050 const std::vector<std::string>& fixVars_) {
0051 inputTree = new TChain((inputDirectoryName + "/" + inputTreeName).c_str());
0052 for (size_t i = 0; i < inputFileNames.size(); i++) {
0053 inputTree->Add(inputFileNames[i].c_str());
0054 }
0055 outputFile = new TFile(outputFileName.c_str(), "recreate");
0056 outputDirectory = outputFile->mkdir(inputDirectoryName.c_str());
0057 numCPU = numCPU_;
0058 saveWorkspace = saveWorkspace_;
0059 massBins = 0;
0060 floatShapeParameters = floatShapeParameters_;
0061 fixVars = fixVars_;
0062 weightVar = "";
0063 if (!floatShapeParameters && fixVars.empty())
0064 std::cout << "TagProbeFitter: "
0065 << "You wnat to fix some variables but do not specify them!";
0066
0067 gROOT->SetStyle("Plain");
0068 gStyle->SetTitleFillColor(0);
0069 gStyle->SetPalette(1);
0070 gStyle->SetOptStat(0);
0071 gStyle->SetPaintTextFormat(".2f");
0072
0073 quiet = false;
0074
0075 binnedFit = false;
0076
0077 doSaveDistributionsPlot = true;
0078
0079
0080 RooAbsReal::defaultIntegratorConfig()->setEpsAbs(1e-13);
0081 RooAbsReal::defaultIntegratorConfig()->setEpsRel(1e-13);
0082
0083 split_mode = 0;
0084 }
0085
0086 TagProbeFitter::~TagProbeFitter() {
0087 if (inputTree)
0088 delete inputTree;
0089 if (outputFile)
0090 outputFile->Close();
0091 }
0092
0093 void TagProbeFitter::setQuiet(bool quiet_) {
0094 quiet = quiet_;
0095 if (quiet) {
0096 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
0097 } else {
0098 RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
0099 }
0100 }
0101
0102 void TagProbeFitter::setSplitMode(unsigned int nevents) { split_mode = nevents; }
0103
0104 bool TagProbeFitter::addVariable(string name, string title, double low, double hi, string units) {
0105 RooRealVar temp(name.c_str(), title.c_str(), low, hi, units.c_str());
0106 temp.setBins(5000, "cache");
0107 variables.addClone(temp);
0108 return true;
0109 }
0110
0111 bool TagProbeFitter::addCategory(string name, string title, string expression) {
0112 RooCategory* c = (RooCategory*)parameterParser.factory(expression.c_str());
0113 if (!c)
0114 return false;
0115
0116 c->SetName(name.c_str());
0117 c->SetTitle(title.c_str());
0118 variables.addClone(*c);
0119 return true;
0120 }
0121
0122 bool TagProbeFitter::addExpression(string expressionName,
0123 string title,
0124 string expression,
0125 const std::vector<string>& arguments) {
0126 expressionVars.push_back(make_pair(make_pair(expressionName, title), make_pair(expression, arguments)));
0127 return true;
0128 }
0129
0130 bool TagProbeFitter::addThresholdCategory(string categoryName, string title, string varName, double cutValue) {
0131 thresholdCategories.push_back(make_pair(make_pair(categoryName, title), make_pair(varName, cutValue)));
0132 return true;
0133 }
0134
0135 void TagProbeFitter::addPdf(string name, vector<string>& pdfCommands) { pdfs[name] = pdfCommands; }
0136
0137 void TagProbeFitter::setBinsForMassPlots(int bins) { massBins = bins; }
0138
0139 void TagProbeFitter::setWeightVar(const std::string& var) { weightVar = var; }
0140
0141 string TagProbeFitter::calculateEfficiency(string dirName,
0142 const std::vector<string>& effCats,
0143 const std::vector<string>& effStates,
0144 vector<string>& unbinnedVariables,
0145 map<string, vector<double> >& binnedReals,
0146 map<string, std::vector<string> >& binnedCategories,
0147 vector<string>& binToPDFmap) {
0148
0149 outputDirectory->cd();
0150
0151 gDirectory->mkdir(dirName.c_str())->cd();
0152
0153 RooArgSet dataVars;
0154
0155
0156 for (vector<string>::iterator v = unbinnedVariables.begin(); v != unbinnedVariables.end(); v++) {
0157 dataVars.addClone(variables[v->c_str()], true);
0158 if (binnedFit && (v == unbinnedVariables.begin())) {
0159 ((RooRealVar&)dataVars[v->c_str()]).setBins(massBins);
0160 }
0161 }
0162
0163 RooArgSet binnedVariables;
0164 RooArgSet binCategories;
0165 for (map<string, vector<double> >::iterator v = binnedReals.begin(); v != binnedReals.end(); v++) {
0166 TString name = v->first;
0167 if (variables.find(name) == nullptr) {
0168 cerr << "Binned variable '" << name << "' not found." << endl;
0169 return "Error";
0170 }
0171 binnedVariables.add(*dataVars.addClone(variables[name]));
0172 ((RooRealVar&)binnedVariables[name]).setBinning(RooBinning(v->second.size() - 1, &v->second[0]));
0173 binCategories.addClone(RooBinningCategory(name + "_bins", name + "_bins", (RooRealVar&)binnedVariables[name]));
0174 }
0175
0176
0177 RooArgSet categories;
0178 RooArgSet mappedCategories;
0179 for (map<string, vector<string> >::iterator v = binnedCategories.begin(); v != binnedCategories.end(); v++) {
0180 TString name = v->first;
0181 if (variables.find(name) == nullptr) {
0182 cerr << "Binned category '" << name << "' not found." << endl;
0183 return "Error";
0184 }
0185 categories.add(*dataVars.addClone(variables[name]));
0186 mappedCategories.addClone(RooMappedCategory(name + "_bins", name + "_bins", (RooCategory&)categories[name]));
0187 for (unsigned int i = 0; i < v->second.size(); i++) {
0188 ((RooMappedCategory&)mappedCategories[name + "_bins"])
0189 .map(v->second[i].c_str(), name + "_" + TString(v->second[i].c_str()).ReplaceAll(",", "_"));
0190 }
0191 }
0192
0193
0194 for (vector<string>::const_iterator effCat = effCats.begin(); effCat != effCats.end(); ++effCat) {
0195 if (variables.find(effCat->c_str()) != nullptr) {
0196 dataVars.addClone(variables[effCat->c_str()], true);
0197 }
0198 }
0199
0200
0201 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator ev = expressionVars.begin(),
0202 eve = expressionVars.end();
0203 ev != eve;
0204 ++ev) {
0205 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed; ++it) {
0206
0207 if (variables.find(it->c_str()))
0208 dataVars.addClone(variables[it->c_str()], true);
0209 }
0210 }
0211
0212 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0213 tce = thresholdCategories.end();
0214 tc != tce;
0215 ++tc) {
0216 if (variables.find(tc->second.first.c_str()))
0217 dataVars.addClone(variables[tc->second.first.c_str()], true);
0218 }
0219
0220
0221 RooDataSet* data(nullptr);
0222 if (not split_mode) {
0223 data = new RooDataSet("data",
0224 "data",
0225 dataVars,
0226 Import(*inputTree),
0227 Cut(""),
0228 WeightVar(weightVar.empty() ? nullptr : weightVar.c_str()));
0229
0230
0231 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0232 ev = expressionVars.begin(),
0233 eve = expressionVars.end();
0234 ev != eve;
0235 ++ev) {
0236 RooArgList args;
0237 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0238 ++it) {
0239 args.add(dataVars[it->c_str()]);
0240 }
0241 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0242 RooRealVar* col = (RooRealVar*)data->addColumn(expr);
0243 dataVars.addClone(*col);
0244 }
0245
0246
0247 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0248 tce = thresholdCategories.end();
0249 tc != tce;
0250 ++tc) {
0251 RooThresholdCategory tmp(tc->first.first.c_str(),
0252 tc->first.second.c_str(),
0253 (RooAbsReal&)dataVars[tc->second.first.c_str()],
0254 "above",
0255 1);
0256 tmp.addThreshold(tc->second.second, "below", 0);
0257 RooCategory* cat = (RooCategory*)data->addColumn(tmp);
0258 dataVars.addClone(*cat);
0259 }
0260 }
0261
0262
0263 RooMultiCategory allCats("allCats", "allCats", RooArgSet(binCategories, mappedCategories));
0264 string effName;
0265
0266 if (not split_mode) {
0267 data->addColumn(allCats);
0268
0269 if (effCats.size() == 1) {
0270 effName = effCats.front() + "::" + effStates.front();
0271 RooMappedCategory efficiencyCategory(
0272 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0273 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0274 data->addColumn(efficiencyCategory);
0275 } else {
0276 RooArgSet rooEffCats;
0277 string multiState = "{";
0278 for (size_t i = 0; i < effCats.size(); ++i) {
0279 if (i) {
0280 multiState += ";";
0281 effName += " && ";
0282 }
0283 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0284 multiState += effStates[i];
0285 effName = effCats[i] + "::" + effStates[i];
0286 }
0287 multiState += "}";
0288 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0289 RooMappedCategory efficiencyCategory(
0290 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0291 efficiencyCategory.map(multiState.c_str(), "Passed");
0292 data->addColumn(efficiencyCategory);
0293 }
0294 }
0295
0296
0297 RooMappedCategory pdfCategory(
0298 "_pdfCategory_", "_pdfCategory_", allCats, (!binToPDFmap.empty()) ? binToPDFmap[0].c_str() : "all");
0299 for (unsigned int i = 1; i < binToPDFmap.size(); i += 2) {
0300 pdfCategory.map(binToPDFmap[i].c_str(), binToPDFmap[i + 1].c_str());
0301 }
0302 if (not split_mode)
0303 data->addColumn(pdfCategory);
0304
0305
0306 RooRealVar efficiency("efficiency", "Efficiency", 0, 1);
0307
0308 RooDataSet fitEfficiency("fit_eff",
0309 "Efficiency from unbinned ML fit",
0310 RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0311 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0312
0313 RooDataSet cntEfficiency("cnt_eff",
0314 "Efficiency from counting",
0315 RooArgSet(RooArgSet(binnedVariables, categories), efficiency),
0316 StoreAsymError(RooArgSet(binnedVariables, efficiency)));
0317
0318 if (not split_mode) {
0319 if (!floatShapeParameters) {
0320
0321 RooWorkspace* w = new RooWorkspace();
0322 w->import(*data);
0323 efficiency.setVal(0);
0324 efficiency.setAsymError(0, 0);
0325 std::cout << "ALL dataset: calling doFitEfficiency with pdf: " << pdfCategory.getLabel() << std::endl;
0326 doFitEfficiency(w, pdfCategory.getLabel(), efficiency);
0327 delete w;
0328 }
0329 } else {
0330
0331 inputTree->SetBranchStatus("*", false);
0332 for (TObject* obj : dataVars)
0333 inputTree->SetBranchStatus(obj->GetName(), true);
0334 }
0335
0336
0337
0338
0339 std::vector<Int_t> allCatValues;
0340 TIterator* it = allCats.typeIterator();
0341 for (RooCatType* t = (RooCatType*)it->Next(); t != nullptr; t = (RooCatType*)it->Next())
0342 allCatValues.push_back(t->getVal());
0343 for (auto iCat : allCatValues) {
0344 const RooCatType* t = allCats.lookupType(iCat);
0345
0346 TString catName = t->GetName();
0347
0348 if (catName.Contains("NotMapped"))
0349 continue;
0350
0351 RooDataSet* data_bin(nullptr);
0352 RooArgSet tmpVars;
0353
0354 if (not split_mode) {
0355
0356 data_bin = (RooDataSet*)data->reduce(Cut(TString::Format("allCats==%d", iCat)));
0357 } else {
0358 data_bin = new RooDataSet("data", "data", dataVars, WeightVar(weightVar.empty() ? nullptr : weightVar.c_str()));
0359
0360 TDirectory* tmp = gDirectory;
0361 gROOT->cd();
0362
0363
0364
0365 unsigned int n_entries = inputTree->GetEntries();
0366 printf("Input number of events: %u\n", n_entries);
0367 unsigned int first_entry = 0;
0368 while (first_entry < n_entries) {
0369 TTree* copyTree = inputTree->CopyTree("", "", split_mode, first_entry);
0370 RooTreeDataStore store("reader",
0371 "reader",
0372 dataVars,
0373 *copyTree,
0374 "",
0375 (weightVar.empty() ? nullptr : weightVar.c_str()));
0376 for (unsigned int i = 0; i < store.GetEntries(); ++i) {
0377 store.get(i);
0378 if (allCats.getIndex() == iCat) {
0379 data_bin->add(dataVars, weightVar.empty() ? 1.0 : dataVars.getRealValue(weightVar.c_str()));
0380 }
0381 }
0382 delete copyTree;
0383 first_entry += split_mode;
0384 data_bin->Print("V");
0385 }
0386 data_bin->Print("V");
0387 tmp->cd();
0388
0389
0390 for (vector<pair<pair<string, string>, pair<string, vector<string> > > >::const_iterator
0391 ev = expressionVars.begin(),
0392 eve = expressionVars.end();
0393 ev != eve;
0394 ++ev) {
0395 RooArgList args;
0396 for (vector<string>::const_iterator it = ev->second.second.begin(), ed = ev->second.second.end(); it != ed;
0397 ++it) {
0398 args.add(dataVars[it->c_str()]);
0399 }
0400 RooFormulaVar expr(ev->first.first.c_str(), ev->first.second.c_str(), ev->second.first.c_str(), args);
0401 RooRealVar* col = (RooRealVar*)data_bin->addColumn(expr);
0402 tmpVars.add(*dataVars.addClone(*col));
0403 }
0404
0405
0406 for (vector<pair<pair<string, string>, pair<string, double> > >::const_iterator tc = thresholdCategories.begin(),
0407 tce = thresholdCategories.end();
0408 tc != tce;
0409 ++tc) {
0410 RooThresholdCategory tmp(tc->first.first.c_str(),
0411 tc->first.second.c_str(),
0412 (RooAbsReal&)dataVars[tc->second.first.c_str()],
0413 "above",
0414 1);
0415 tmp.addThreshold(tc->second.second, "below", 0);
0416 RooCategory* cat = (RooCategory*)data_bin->addColumn(tmp);
0417 tmpVars.add(*dataVars.addClone(*cat));
0418 }
0419
0420
0421 if (effCats.size() == 1) {
0422 effName = effCats.front() + "::" + effStates.front();
0423 RooMappedCategory efficiencyCategory(
0424 "_efficiencyCategory_", "_efficiencyCategory_", (RooCategory&)dataVars[effCats.front().c_str()], "Failed");
0425 efficiencyCategory.map(effStates.front().c_str(), "Passed");
0426 data_bin->addColumn(efficiencyCategory);
0427 } else {
0428 RooArgSet rooEffCats;
0429 string multiState = "{";
0430 for (size_t i = 0; i < effCats.size(); ++i) {
0431 if (i) {
0432 multiState += ";";
0433 effName += " && ";
0434 }
0435 rooEffCats.add((RooCategory&)dataVars[effCats[i].c_str()]);
0436 multiState += effStates[i];
0437 effName = effCats[i] + "::" + effStates[i];
0438 }
0439 multiState += "}";
0440 RooMultiCategory efficiencyMultiCategory("_efficiencyMultiCategory_", "_efficiencyMultiCategory_", rooEffCats);
0441 RooMappedCategory efficiencyCategory(
0442 "_efficiencyCategory_", "_efficiencyCategory_", efficiencyMultiCategory, "Failed");
0443 efficiencyCategory.map(multiState.c_str(), "Passed");
0444 data_bin->addColumn(efficiencyCategory);
0445 }
0446 data_bin->addColumn(pdfCategory);
0447 }
0448
0449
0450 const RooArgSet* row = data_bin->get();
0451
0452
0453 TString pdfName(((RooCategory*)row->find("_pdfCategory_"))->getLabel());
0454
0455
0456 TString dirName = catName;
0457 dirName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "__");
0458 if (pdfName.Length() > 0) {
0459 dirName.Append("__").Append(pdfName);
0460 }
0461
0462 cout << "Fitting bin: " << dirName << endl;
0463
0464 gDirectory->mkdir(dirName)->cd();
0465
0466
0467 RooWorkspace* w = new RooWorkspace();
0468
0469 w->import(*data_bin);
0470 delete data_bin;
0471 data_bin =
0472 dynamic_cast<RooDataSet*>(w->data("data"));
0473
0474
0475 if (doSaveDistributionsPlot)
0476 saveDistributionsPlot(w);
0477
0478 if (data_bin->numEntries() > 0) {
0479
0480 RooArgSet meanOfVariables;
0481 for (const RooAbsArg* vv : binnedVariables) {
0482 const RooRealVar* v = dynamic_cast<const RooRealVar*>(vv);
0483 meanOfVariables.addClone(*v);
0484 double mean = w->data("data")->mean(*v);
0485 RooBinning binning((RooBinning&)v->getBinning());
0486 int ind = binning.binNumber(mean);
0487 RooRealVar& newVar = (RooRealVar&)meanOfVariables[v->GetName()];
0488 newVar.setVal(mean);
0489 newVar.setAsymError(binning.binLow(ind) - mean, binning.binHigh(ind) - mean);
0490 }
0491
0492
0493
0494 meanOfVariables.addClone(*data_bin->get(0), true);
0495
0496 efficiency.setVal(0);
0497 efficiency.setAsymError(0, 0);
0498 doFitEfficiency(w, pdfName.Data(), efficiency);
0499 fitEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0500
0501
0502
0503
0504
0505 efficiency.setVal(0);
0506 doCntEfficiency(w, efficiency);
0507 cntEfficiency.add(RooArgSet(meanOfVariables, efficiency));
0508 }
0509
0510 if (saveWorkspace) {
0511 w->Write("w");
0512 }
0513
0514 delete w;
0515 if (split_mode)
0516 dataVars.remove(tmpVars);
0517
0518 gDirectory->cd("..");
0519 }
0520
0521
0522 fitEfficiency.Write();
0523 gDirectory->mkdir("fit_eff_plots")->cd();
0524 saveEfficiencyPlots(fitEfficiency, effName, binnedVariables, mappedCategories);
0525 gDirectory->cd("..");
0526
0527
0528
0529
0530
0531
0532 cntEfficiency.Write();
0533 gDirectory->mkdir("cnt_eff_plots")->cd();
0534 saveEfficiencyPlots(cntEfficiency, effName, binnedVariables, mappedCategories);
0535 gDirectory->cd("..");
0536
0537 if (not split_mode)
0538 delete data;
0539
0540
0541 return "";
0542 }
0543
0544 void TagProbeFitter::doFitEfficiency(RooWorkspace* w, string pdfName, RooRealVar& efficiency) {
0545
0546 if (pdfName == "all") {
0547 return;
0548 }
0549
0550 createPdf(w, pdfs[pdfName]);
0551
0552 setInitialValues(w);
0553 std::unique_ptr<RooFitResult> res;
0554
0555 RooAbsData* data = w->data("data");
0556 if (binnedFit) {
0557
0558 const RooArgSet* dataObs = data->get(0);
0559
0560 std::unique_ptr<RooArgSet> obs{w->pdf("simPdf")->getObservables(dataObs)};
0561 RooDataHist bdata{"data_binned", "data_binned", *obs, *data};
0562 w->import(bdata);
0563 data = w->data("data_binned");
0564 }
0565
0566 double totPassing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0567 double totFailing = data->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0568
0569 std::unique_ptr<RooAbsReal> simNLL{w->pdf("simPdf")->createNLL(*data, Extended(true), NumCPU(numCPU))};
0570
0571 RooMinimizer minimizer(*simNLL);
0572 minimizer.setStrategy(1);
0573 minimizer.setProfile(true);
0574 RooProfileLL profileLL("simPdfNLL", "", *simNLL, *w->var("efficiency"));
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591 if (!fixVars.empty()) {
0592
0593 if (!floatShapeParameters && fixVarValues.empty()) {
0594
0595
0596
0597
0598
0599
0600
0601
0602 varFixer(w, true);
0603
0604 minimizer.minimize("Minuit2", "Scan");
0605 minimizer.migrad();
0606 minimizer.hesse();
0607
0608
0609
0610
0611 varFixer(w, false);
0612
0613 minimizer.minimize("Minuit2", "Scan");
0614 minimizer.migrad();
0615 minimizer.hesse();
0616
0617
0618
0619
0620 varSaver(w);
0621
0622 }
0623
0624
0625 if (!floatShapeParameters)
0626 varRestorer(w);
0627
0628
0629 minimizer.minimize("Minuit2", "Scan");
0630 minimizer.migrad();
0631 minimizer.hesse();
0632
0633 profileLL.getVal();
0634 RooMinimizer* profMinuit = profileLL.minimizer();
0635 profMinuit->setProfile(true);
0636 profMinuit->setStrategy(2);
0637 profMinuit->setPrintLevel(1);
0638 profMinuit->minos(*w->var("efficiency"));
0639 res.reset(profMinuit->save());
0640
0641
0642
0643 }
0644
0645
0646 if (floatShapeParameters) {
0647
0648 varFixer(w, false);
0649
0650
0651 minimizer.minimize("Minuit2", "Scan");
0652 minimizer.migrad();
0653 minimizer.hesse();
0654 res.reset(w->pdf("simPdf")->fitTo(*data,
0655 Save(true),
0656 Extended(true),
0657 NumCPU(numCPU),
0658 Strategy(2),
0659 Minos(*w->var("efficiency")),
0660 PrintLevel(quiet ? -1 : 1),
0661 PrintEvalErrors(quiet ? -1 : 1),
0662 Warnings(!quiet)));
0663 }
0664
0665
0666 res->Write("fitresults");
0667 w->saveSnapshot("finalState", w->components());
0668 saveFitPlot(w);
0669
0670 RooRealVar* e = (RooRealVar*)res->floatParsFinal().find("efficiency");
0671
0672
0673 efficiency.setVal(e->getVal());
0674 Double_t errLo = e->getErrorLo(), errHi = e->getErrorHi();
0675 if (errLo == 0 && e->getVal() < 0.5)
0676 errLo = e->getMin() - e->getVal();
0677 if (errHi == 0 && e->getVal() > 0.5)
0678 errHi = e->getMax() - e->getVal();
0679 efficiency.setAsymError(errLo, errHi);
0680
0681 if (totPassing * totFailing == 0) {
0682 RooRealVar* nTot = (RooRealVar*)res->floatParsFinal().find("numTot");
0683 RooRealVar* fSig = (RooRealVar*)res->floatParsFinal().find("fSigAll");
0684 double cerr = ROOT::Math::beta_quantile(1 - (1.0 - .68540158589942957) / 2, 1, nTot->getVal() * fSig->getVal());
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694 if (totPassing == 0) {
0695 efficiency.setVal(0);
0696 efficiency.setAsymError(0, cerr);
0697 } else {
0698 efficiency.setVal(1);
0699 efficiency.setAsymError(-cerr, 0);
0700 }
0701 }
0702 }
0703
0704 void TagProbeFitter::createPdf(RooWorkspace* w, vector<string>& pdfCommands) {
0705
0706 for (unsigned int i = 0; i < pdfCommands.size(); i++) {
0707 const std::string& command = pdfCommands[i];
0708 if (command.find("#import ") == 0) {
0709 TDirectory* here = gDirectory;
0710 w->import(command.substr(8).c_str());
0711 here->cd();
0712 } else {
0713 TDirectory* here = gDirectory;
0714 w->factory(command.c_str());
0715 here->cd();
0716 }
0717 }
0718
0719
0720 w->factory("expr::nSignalPass('efficiency*fSigAll*numTot', efficiency, fSigAll[.9,0,1],numTot[1,0,1e10])");
0721 w->factory("expr::nSignalFail('(1-efficiency)*fSigAll*numTot', efficiency, fSigAll,numTot)");
0722 w->factory("expr::nBkgPass('effBkg*(1-fSigAll)*numTot', effBkg[.9,0,1],fSigAll,numTot)");
0723 w->factory("expr::nBkgFail('(1-effBkg)*(1-fSigAll)*numTot', effBkg,fSigAll,numTot)");
0724 TString sPass = "signal", sFail = "signal";
0725 if (w->pdf("signalPass") != nullptr && w->pdf("signalFail") != nullptr) {
0726 if (w->pdf("signal") != nullptr)
0727 throw std::logic_error(
0728 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0729 sPass = "signalPass";
0730 sFail = "signalFail";
0731 } else if (w->pdf("signal") != nullptr) {
0732 if (w->pdf("signalPass") != nullptr || w->pdf("signalFail") != nullptr) {
0733 throw std::logic_error(
0734 "You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail'), not both!");
0735 }
0736 } else {
0737 throw std::logic_error("You must either define one 'signal' PDF or two PDFs ('signalPass', 'signalFail')");
0738 }
0739 w->factory("SUM::pdfPass(nSignalPass*" + sPass + ", nBkgPass*backgroundPass)");
0740 w->factory("SUM::pdfFail(nSignalFail*" + sFail + ", nBkgFail*backgroundFail)");
0741
0742 w->factory("SIMUL::simPdf(_efficiencyCategory_, Passed=pdfPass, Failed=pdfFail)");
0743
0744 if (w->pdf("simPdf") == nullptr)
0745 throw std::runtime_error("Could not create simultaneous fit pdf.");
0746 if (w->var("signalFractionInPassing") == nullptr)
0747 w->factory("signalFractionInPassing[0.9]");
0748 }
0749
0750 void TagProbeFitter::setInitialValues(RooWorkspace* w) {
0751
0752 double totPassing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
0753 double totFailing = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
0754
0755
0756
0757 w->var("numTot")->setVal(totPassing + totFailing);
0758 w->var("numTot")->setMax(2.0 * (totPassing + totFailing) + 10);
0759
0760 if (totPassing == 0) {
0761 w->var("efficiency")->setVal(0.0);
0762 w->var("efficiency")->setAsymError(0, 1);
0763 w->var("efficiency")->setConstant(false);
0764 } else if (totFailing == 0) {
0765 w->var("efficiency")->setVal(1.0);
0766 w->var("efficiency")->setAsymError(-1, 0);
0767 w->var("efficiency")->setConstant(false);
0768 } else {
0769 w->var("efficiency")->setConstant(false);
0770 }
0771
0772
0773 w->saveSnapshot("initialState", w->components());
0774 }
0775
0776 void TagProbeFitter::saveFitPlot(RooWorkspace* w) {
0777
0778 RooCategory& efficiencyCategory = *w->cat("_efficiencyCategory_");
0779 RooAbsData* dataAll = (binnedFit ? w->data("data_binned") : w->data("data"));
0780 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0781 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0782 RooAbsPdf& pdf = *w->pdf("simPdf");
0783 std::unique_ptr<RooArgSet> obs(pdf.getObservables(*dataAll));
0784 RooRealVar* mass = nullptr;
0785 for (RooAbsArg* v : *obs) {
0786 if (!v->InheritsFrom("RooRealVar"))
0787 continue;
0788 mass = (RooRealVar*)v;
0789 break;
0790 }
0791 if (!mass)
0792 return;
0793
0794 TCanvas canvas("fit_canvas");
0795 canvas.Divide(2, 2);
0796 vector<RooPlot*> frames;
0797
0798 canvas.cd(1);
0799 if (massBins == 0) {
0800 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes")));
0801 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes")));
0802 frames.push_back(mass->frame(Name("All"), Title("All Probes")));
0803 } else {
0804 frames.push_back(mass->frame(Name("Passing"), Title("Passing Probes"), Bins(massBins)));
0805 frames.push_back(mass->frame(Name("Failing"), Title("Failing Probes"), Bins(massBins)));
0806 frames.push_back(mass->frame(Name("All"), Title("All Probes"), Bins(massBins)));
0807 }
0808 dataPass->plotOn(frames[0]);
0809 pdf.plotOn(frames[0], Slice(efficiencyCategory, "Passed"), ProjWData(*dataPass), LineColor(kGreen));
0810 pdf.plotOn(frames[0],
0811 Slice(efficiencyCategory, "Passed"),
0812 ProjWData(*dataPass),
0813 LineColor(kGreen),
0814 Components("backgroundPass"),
0815 LineStyle(kDashed));
0816 frames[0]->Draw();
0817
0818 canvas.cd(2);
0819 dataFail->plotOn(frames[1]);
0820 pdf.plotOn(frames[1], Slice(efficiencyCategory, "Failed"), ProjWData(*dataFail), LineColor(kRed));
0821 pdf.plotOn(frames[1],
0822 Slice(efficiencyCategory, "Failed"),
0823 ProjWData(*dataFail),
0824 LineColor(kRed),
0825 Components("backgroundFail"),
0826 LineStyle(kDashed));
0827 frames[1]->Draw();
0828
0829 canvas.cd(3);
0830 dataAll->plotOn(frames.back());
0831 pdf.plotOn(frames.back(), ProjWData(*dataAll), LineColor(kBlue));
0832 pdf.plotOn(frames.back(),
0833 ProjWData(*dataAll),
0834 LineColor(kBlue),
0835 Components("backgroundPass,backgroundFail"),
0836 LineStyle(kDashed));
0837 frames.back()->Draw();
0838
0839 canvas.cd(4);
0840 frames.push_back(mass->frame(Name("Fit Results"), Title("Fit Results")));
0841 pdf.paramOn(
0842 frames.back(), RooFit::Label(""), RooFit::Format("NELU", AutoPrecision(0)), RooFit::Layout(0.1, 0.9, 0.9));
0843
0844 frames.back()->findObject(Form("%s_paramBox", pdf.GetName()))->Draw();
0845
0846 canvas.Draw();
0847 canvas.Write();
0848 for (size_t i = 0; i < frames.size(); i++) {
0849 delete frames[i];
0850 }
0851 delete dataPass;
0852 delete dataFail;
0853 }
0854
0855 void TagProbeFitter::saveDistributionsPlot(RooWorkspace* w) {
0856
0857 RooAbsData* dataAll = w->data("data");
0858 RooAbsData* dataPass = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Passed"));
0859 RooAbsData* dataFail = dataAll->reduce(Cut("_efficiencyCategory_==_efficiencyCategory_::Failed"));
0860
0861 const RooArgSet* vars = dataAll->get();
0862 vector<RooRealVar*> reals;
0863 for (RooAbsArg* v : *vars) {
0864 if (!v->InheritsFrom("RooRealVar"))
0865 continue;
0866 reals.push_back((RooRealVar*)v);
0867 }
0868 TCanvas canvas("distributions_canvas");
0869 canvas.Divide(3, reals.size());
0870 vector<RooPlot*> frames;
0871 for (unsigned int i = 0; i < reals.size(); i++) {
0872
0873 canvas.cd(3 * i + 1);
0874 frames.push_back(reals[i]->frame(Name("Passing"), Title("Passing Probes"), Bins(100)));
0875 dataPass->plotOn(frames.back(), LineColor(kGreen));
0876 dataPass->statOn(frames.back());
0877 frames.back()->Draw();
0878
0879 canvas.cd(3 * i + 2);
0880 frames.push_back(reals[i]->frame(Name("Failing"), Title("Failing Probes"), Bins(100)));
0881 dataFail->plotOn(frames.back(), LineColor(kRed));
0882 dataFail->statOn(frames.back());
0883 frames.back()->Draw();
0884
0885 canvas.cd(3 * i + 3);
0886 frames.push_back(reals[i]->frame(Name("All"), Title("All Probes"), Bins(100)));
0887 dataAll->plotOn(frames.back(), LineColor(kBlue));
0888 dataAll->statOn(frames.back());
0889 frames.back()->Draw();
0890 }
0891 canvas.Draw();
0892 canvas.Write();
0893 for (size_t i = 0; i < frames.size(); i++) {
0894 delete frames[i];
0895 }
0896 delete dataPass;
0897 delete dataFail;
0898 }
0899
0900 void TagProbeFitter::saveEfficiencyPlots(RooDataSet& eff,
0901 const TString& effName,
0902 RooArgSet& binnedVariables,
0903 RooArgSet& mappedCategories) {
0904 bool isOnePoint =
0905 (eff.numEntries() == 1);
0906 for (auto it1 = binnedVariables.begin(); it1 != binnedVariables.end(); it1++) {
0907 RooRealVar* v1 = dynamic_cast<RooRealVar*>(*it1);
0908 RooArgSet binCategories1D;
0909 if (v1->numBins() == 1 && !isOnePoint)
0910 continue;
0911 for (auto it2 = binnedVariables.begin(); it2 != binnedVariables.end(); it2++) {
0912 RooRealVar* v2 = dynamic_cast<RooRealVar*>(*it2);
0913 if (v2 == v1)
0914 continue;
0915 if (v2->numBins() == 1 && !isOnePoint)
0916 continue;
0917 binCategories1D.addClone(
0918 RooBinningCategory(TString(v2->GetName()) + "_bins", TString(v2->GetName()) + "_bins", *v2));
0919
0920 RooArgSet binCategories2D;
0921 for (auto it3 = binnedVariables.begin(); it3 != binnedVariables.end(); it3++) {
0922 RooRealVar* v3 = dynamic_cast<RooRealVar*>(*it3);
0923 if (v3 == v1 || v3 == v2)
0924 continue;
0925 binCategories2D.addClone(
0926 RooBinningCategory(TString(v3->GetName()) + "_bins", TString(v3->GetName()) + "_bins", *v3));
0927 }
0928 RooMultiCategory allCats2D("allCats2D", "allCats2D", RooArgSet(binCategories2D, mappedCategories));
0929 if (allCats2D.numTypes() == 0) {
0930 makeEfficiencyPlot2D(eff, *v1, *v2, TString::Format("%s_%s_PLOT", v1->GetName(), v2->GetName()), "", effName);
0931 } else {
0932 RooDataSet myEff(eff);
0933 myEff.addColumn(allCats2D);
0934 std::unique_ptr<TIterator> catIt(allCats2D.typeIterator());
0935 for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0936 TString catName = t->GetName();
0937 if (catName.Contains("NotMapped"))
0938 continue;
0939 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0940 makeEfficiencyPlot2D(myEff,
0941 *v1,
0942 *v2,
0943 TString::Format("%s_%s_PLOT_%s", v1->GetName(), v2->GetName(), catName.Data()),
0944 catName,
0945 effName,
0946 "allCats2D",
0947 t->getVal());
0948 }
0949 }
0950 }
0951 RooMultiCategory allCats1D("allCats1D", "allCats1D", RooArgSet(binCategories1D, mappedCategories));
0952 if (allCats1D.numTypes() == 0) {
0953 makeEfficiencyPlot1D(eff, *v1, TString::Format("%s_PLOT", v1->GetName()), "", effName);
0954 } else {
0955 RooDataSet myEff(eff);
0956 myEff.addColumn(allCats1D);
0957 std::unique_ptr<TIterator> catIt(allCats1D.typeIterator());
0958 for (RooCatType* t = (RooCatType*)catIt->Next(); t != nullptr; t = (RooCatType*)catIt->Next()) {
0959 TString catName = t->GetName();
0960 if (catName.Contains("NotMapped"))
0961 continue;
0962 catName.ReplaceAll("{", "").ReplaceAll("}", "").ReplaceAll(";", "_&_");
0963 makeEfficiencyPlot1D(myEff,
0964 *v1,
0965 TString::Format("%s_PLOT_%s", v1->GetName(), catName.Data()),
0966 catName,
0967 effName,
0968 "allCats1D",
0969 t->getVal());
0970 }
0971 }
0972 }
0973 }
0974
0975 void TagProbeFitter::makeEfficiencyPlot1D(RooDataSet& eff,
0976 RooRealVar& v,
0977 const TString& plotName,
0978 const TString& plotTitle,
0979 const TString& effName,
0980 const char* catName,
0981 int catIndex) {
0982 TGraphAsymmErrors* p = new TGraphAsymmErrors();
0983 const RooArgSet* entry = eff.get();
0984 const RooRealVar& vi = dynamic_cast<const RooRealVar&>(*entry->find(v.GetName()));
0985 const RooRealVar& ei = dynamic_cast<const RooRealVar&>(*entry->find("efficiency"));
0986 for (unsigned int i = 0, n = eff.numEntries(); i < n; ++i) {
0987 entry = eff.get(i);
0988 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
0989 continue;
0990 int j = p->GetN();
0991 p->Set(j + 1);
0992 p->SetPoint(j, vi.getVal(), ei.getVal());
0993 p->SetPointError(j, -vi.getAsymErrorLo(), vi.getAsymErrorHi(), -ei.getAsymErrorLo(), ei.getAsymErrorHi());
0994 }
0995 TCanvas canvas(plotName);
0996 TH1F* frame = new TH1F("frame", "Efficiency of " + effName, 1, v.getMin(), v.getMax());
0997 frame->SetDirectory(nullptr);
0998 p->SetNameTitle(Form("hxy_%s", eff.GetName()), "Efficiency of " + effName);
0999 p->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1000 p->GetYaxis()->SetTitle("Efficiency of " + effName);
1001 frame->GetXaxis()->SetTitle(strlen(v.getUnit()) ? Form("%s (%s)", v.GetName(), v.getUnit()) : v.GetName());
1002 frame->GetYaxis()->SetTitle("Efficiency of " + effName);
1003 frame->GetYaxis()->SetRangeUser(0, 1);
1004 frame->Draw();
1005 p->SetLineWidth(2);
1006 p->SetMarkerStyle(kFullCircle);
1007 p->SetMarkerSize(1.2);
1008 p->Draw("P SAME");
1009 canvas.Write();
1010 delete frame;
1011 delete p;
1012 }
1013
1014 void TagProbeFitter::makeEfficiencyPlot2D(RooDataSet& eff,
1015 RooRealVar& v1,
1016 RooRealVar& v2,
1017 const TString& plotName,
1018 const TString& plotTitle,
1019 const TString& effName,
1020 const char* catName,
1021 int catIndex) {
1022 TCanvas canvas(plotName);
1023 canvas.SetRightMargin(0.15);
1024 TH2F* h = new TH2F(plotName,
1025 plotName,
1026 v1.getBinning().numBins(),
1027 v1.getBinning().array(),
1028 v2.getBinning().numBins(),
1029 v2.getBinning().array());
1030 const RooArgSet* set = eff.get();
1031 RooRealVar* e = (RooRealVar*)set->find("efficiency");
1032 RooRealVar* v1_ = (RooRealVar*)set->find(v1.GetName());
1033 RooRealVar* v2_ = (RooRealVar*)set->find(v2.GetName());
1034 h->SetTitle(TString::Format("%s;%s%s;%s%s;Efficiency of %s",
1035 plotTitle.Data(),
1036 v1.GetTitle(),
1037 TString(v1.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v1.getUnit()).Data(),
1038 v2.GetTitle(),
1039 TString(v2.getUnit()).Length() == 0 ? "" : TString::Format(" (%s)", v2.getUnit()).Data(),
1040 effName.Data()));
1041 h->SetOption("colztexte");
1042 h->GetZaxis()->SetRangeUser(-0.001, 1.001);
1043 h->SetStats(false);
1044 for (int i = 0; i < eff.numEntries(); i++) {
1045 const RooArgSet* entry = eff.get(i);
1046 if (catName != nullptr && entry->getCatIndex(catName) != catIndex)
1047 continue;
1048 h->SetBinContent(h->FindBin(v1_->getVal(), v2_->getVal()), e->getVal());
1049 h->SetBinError(h->FindBin(v1_->getVal(), v2_->getVal()), (e->getErrorHi() - e->getErrorLo()) / 2.);
1050 }
1051 h->Draw();
1052 canvas.Draw();
1053 canvas.Write();
1054 delete h;
1055 }
1056
1057 void TagProbeFitter::doSBSEfficiency(RooWorkspace* w, RooRealVar& efficiency) {}
1058
1059 void TagProbeFitter::doCntEfficiency(RooWorkspace* w, RooRealVar& efficiency) {
1060 int pass = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Passed");
1061 int fail = w->data("data")->sumEntries("_efficiencyCategory_==_efficiencyCategory_::Failed");
1062 double e = (pass + fail == 0) ? 0 : pass / double(pass + fail);
1063
1064 double alpha = (1.0 - .68540158589942957) / 2;
1065 double lo = (pass == 0) ? 0.0 : ROOT::Math::beta_quantile(alpha, pass, fail + 1);
1066 double hi = (fail == 0) ? 1.0 : ROOT::Math::beta_quantile(1 - alpha, pass + 1, fail);
1067
1068
1069
1070
1071 efficiency.setVal(e);
1072 efficiency.setAsymError(lo - e, hi - e);
1073 }
1074
1075 void TagProbeFitter::varFixer(RooWorkspace* w, bool fix) {
1076 std::vector<std::string>::const_iterator it;
1077 for (it = fixVars.begin(); it < fixVars.end(); it++) {
1078 if (w->var((*it).c_str()))
1079 w->var((*it).c_str())->setAttribute("Constant", fix);
1080 else {
1081 std::cout << "TagProbeFitter: "
1082 << "Can't find a variable to fix: " << *it;
1083 }
1084 }
1085 }
1086
1087 void TagProbeFitter::varSaver(RooWorkspace* w) {
1088 if (!fixVarValues.empty()) {
1089 std::cout << "attempt to save variables more than once!" << std::endl;
1090 return;
1091 }
1092 std::vector<std::string>::const_iterator it;
1093 for (it = fixVars.begin(); it < fixVars.end(); it++) {
1094 fixVarValues.push_back(w->var((*it).c_str())->getVal());
1095 }
1096 }
1097
1098 void TagProbeFitter::varRestorer(RooWorkspace* w) {
1099 if (fixVarValues.size() == fixVars.size())
1100 for (unsigned int i = 0; i < fixVars.size(); i++) {
1101 std::cout << "setting variable " << fixVars[i].c_str() << std::endl;
1102 w->var(fixVars[i].c_str())->setVal(fixVarValues[i]);
1103 }
1104 else {
1105 std::cout << "fixVars and fixVarValues are not of the same size!" << std::endl;
1106 }
1107 }