File indexing completed on 2024-04-06 12:02:43
0001 #include "BTagCalibrationStandalone.h"
0002 #include <iostream>
0003 #include <exception>
0004 #include <algorithm>
0005 #include <sstream>
0006
0007
0008 BTagEntry::Parameters::Parameters(
0009 OperatingPoint op,
0010 std::string measurement_type,
0011 std::string sys_type,
0012 JetFlavor jf,
0013 float eta_min,
0014 float eta_max,
0015 float pt_min,
0016 float pt_max,
0017 float discr_min,
0018 float discr_max
0019 ):
0020 operatingPoint(op),
0021 measurementType(measurement_type),
0022 sysType(sys_type),
0023 jetFlavor(jf),
0024 etaMin(eta_min),
0025 etaMax(eta_max),
0026 ptMin(pt_min),
0027 ptMax(pt_max),
0028 discrMin(discr_min),
0029 discrMax(discr_max)
0030 {
0031 std::transform(measurementType.begin(), measurementType.end(),
0032 measurementType.begin(), ::tolower);
0033 std::transform(sysType.begin(), sysType.end(),
0034 sysType.begin(), ::tolower);
0035 }
0036
0037 BTagEntry::BTagEntry(const std::string &csvLine)
0038 {
0039
0040 std::stringstream buff(csvLine);
0041 std::vector<std::string> vec;
0042 std::string token;
0043 while (std::getline(buff, token, ","[0])) {
0044 token = BTagEntry::trimStr(token);
0045 if (token.empty()) {
0046 continue;
0047 }
0048 vec.push_back(token);
0049 }
0050 if (vec.size() != 11) {
0051 std::cerr << "ERROR in BTagCalibration: "
0052 << "Invalid csv line; num tokens != 11: "
0053 << csvLine;
0054 throw std::exception();
0055 }
0056
0057
0058 char chars[] = " \"\n";
0059 for (unsigned int i = 0; i < strlen(chars); ++i) {
0060 vec[1].erase(remove(vec[1].begin(),vec[1].end(),chars[i]),vec[1].end());
0061 vec[2].erase(remove(vec[2].begin(),vec[2].end(),chars[i]),vec[2].end());
0062 vec[10].erase(remove(vec[10].begin(),vec[10].end(),chars[i]),vec[10].end());
0063 }
0064
0065
0066 formula = vec[10];
0067 TF1 f1("", formula.c_str());
0068 if (f1.IsZombie()) {
0069 std::cerr << "ERROR in BTagCalibration: "
0070 << "Invalid csv line; formula does not compile: "
0071 << csvLine;
0072 throw std::exception();
0073 }
0074
0075
0076 unsigned op = stoi(vec[0]);
0077 if (op > 3) {
0078 std::cerr << "ERROR in BTagCalibration: "
0079 << "Invalid csv line; OperatingPoint > 3: "
0080 << csvLine;
0081 throw std::exception();
0082 }
0083 unsigned jf = stoi(vec[3]);
0084 if (jf > 2) {
0085 std::cerr << "ERROR in BTagCalibration: "
0086 << "Invalid csv line; JetFlavor > 2: "
0087 << csvLine;
0088 throw std::exception();
0089 }
0090 params = BTagEntry::Parameters(
0091 BTagEntry::OperatingPoint(op),
0092 vec[1],
0093 vec[2],
0094 BTagEntry::JetFlavor(jf),
0095 stof(vec[4]),
0096 stof(vec[5]),
0097 stof(vec[6]),
0098 stof(vec[7]),
0099 stof(vec[8]),
0100 stof(vec[9])
0101 );
0102 }
0103
0104 BTagEntry::BTagEntry(const std::string &func, BTagEntry::Parameters p):
0105 formula(func),
0106 params(p)
0107 {
0108 TF1 f1("", formula.c_str());
0109 if (f1.IsZombie()) {
0110 std::cerr << "ERROR in BTagCalibration: "
0111 << "Invalid func string; formula does not compile: "
0112 << func;
0113 throw std::exception();
0114 }
0115 }
0116
0117 BTagEntry::BTagEntry(const TF1* func, BTagEntry::Parameters p):
0118 formula(std::string(func->GetExpFormula("p").Data())),
0119 params(p)
0120 {
0121 if (func->IsZombie()) {
0122 std::cerr << "ERROR in BTagCalibration: "
0123 << "Invalid TF1 function; function is zombie: "
0124 << func->GetName();
0125 throw std::exception();
0126 }
0127 }
0128
0129
0130
0131
0132 std::string th1ToFormulaLin(const TH1* hist) {
0133 int nbins = hist->GetNbinsX();
0134 TAxis const* axis = hist->GetXaxis();
0135 std::stringstream buff;
0136 buff << "x<" << axis->GetBinLowEdge(1) << " ? 0. : ";
0137 for (int i=1; i<nbins+1; ++i) {
0138 char tmp_buff[50];
0139 sprintf(tmp_buff,
0140 "x<%g ? %g : ",
0141 axis->GetBinUpEdge(i),
0142 hist->GetBinContent(i));
0143 buff << tmp_buff;
0144 }
0145 buff << 0.;
0146 return buff.str();
0147 }
0148
0149
0150
0151
0152 std::string th1ToFormulaBinTree(const TH1* hist, int start=0, int end=-1) {
0153 if (end == -1) {
0154 start = 0.;
0155 end = hist->GetNbinsX()+1;
0156 TH1* h2 = (TH1*) hist->Clone();
0157 h2->SetBinContent(start, 0);
0158 h2->SetBinContent(end, 0);
0159 std::string res = th1ToFormulaBinTree(h2, start, end);
0160 delete h2;
0161 return res;
0162 }
0163 if (start == end) {
0164 char tmp_buff[20];
0165 sprintf(tmp_buff, "%g", hist->GetBinContent(start));
0166 return std::string(tmp_buff);
0167 }
0168 if (start == end - 1) {
0169 char tmp_buff[70];
0170 sprintf(tmp_buff,
0171 "x<%g ? %g:%g",
0172 hist->GetXaxis()->GetBinUpEdge(start),
0173 hist->GetBinContent(start),
0174 hist->GetBinContent(end));
0175 return std::string(tmp_buff);
0176 }
0177
0178
0179 std::stringstream buff;
0180 int mid = (end-start)/2 + start;
0181 char tmp_buff[25];
0182 sprintf(tmp_buff,
0183 "x<%g ? (",
0184 hist->GetXaxis()->GetBinUpEdge(mid));
0185 buff << tmp_buff
0186 << th1ToFormulaBinTree(hist, start, mid)
0187 << ") : ("
0188 << th1ToFormulaBinTree(hist, mid+1, end)
0189 << ")";
0190 return buff.str();
0191 }
0192
0193 BTagEntry::BTagEntry(const TH1* hist, BTagEntry::Parameters p):
0194 params(p)
0195 {
0196 int nbins = hist->GetNbinsX();
0197 TAxis const* axis = hist->GetXaxis();
0198
0199
0200 if (params.operatingPoint == BTagEntry::OP_RESHAPING) {
0201 params.discrMin = axis->GetBinLowEdge(1);
0202 params.discrMax = axis->GetBinUpEdge(nbins);
0203 } else {
0204 params.ptMin = axis->GetBinLowEdge(1);
0205 params.ptMax = axis->GetBinUpEdge(nbins);
0206 }
0207
0208
0209
0210 if (nbins < 15) {
0211 formula = th1ToFormulaLin(hist);
0212 } else {
0213 formula = th1ToFormulaBinTree(hist);
0214 }
0215
0216
0217 TF1 f1("", formula.c_str());
0218 if (f1.IsZombie()) {
0219 std::cerr << "ERROR in BTagCalibration: "
0220 << "Invalid histogram; formula does not compile (>150 bins?): "
0221 << hist->GetName();
0222 throw std::exception();
0223 }
0224 }
0225
0226 std::string BTagEntry::makeCSVHeader()
0227 {
0228 return "OperatingPoint, "
0229 "measurementType, "
0230 "sysType, "
0231 "jetFlavor, "
0232 "etaMin, "
0233 "etaMax, "
0234 "ptMin, "
0235 "ptMax, "
0236 "discrMin, "
0237 "discrMax, "
0238 "formula \n";
0239 }
0240
0241 std::string BTagEntry::makeCSVLine() const
0242 {
0243 std::stringstream buff;
0244 buff << params.operatingPoint
0245 << ", " << params.measurementType
0246 << ", " << params.sysType
0247 << ", " << params.jetFlavor
0248 << ", " << params.etaMin
0249 << ", " << params.etaMax
0250 << ", " << params.ptMin
0251 << ", " << params.ptMax
0252 << ", " << params.discrMin
0253 << ", " << params.discrMax
0254 << ", \"" << formula
0255 << "\" \n";
0256 return buff.str();
0257 }
0258
0259 std::string BTagEntry::trimStr(std::string str) {
0260 size_t s = str.find_first_not_of(" \n\r\t");
0261 size_t e = str.find_last_not_of (" \n\r\t");
0262
0263 if((std::string::npos == s) || (std::string::npos == e))
0264 return "";
0265 else
0266 return str.substr(s, e-s+1);
0267 }
0268
0269
0270 #include <fstream>
0271 #include <sstream>
0272
0273
0274
0275 BTagCalibration::BTagCalibration(const std::string &taggr):
0276 tagger_(taggr)
0277 {}
0278
0279 BTagCalibration::BTagCalibration(const std::string &taggr,
0280 const std::string &filename):
0281 tagger_(taggr)
0282 {
0283 std::ifstream ifs(filename);
0284 if (!ifs.good()) {
0285 std::cerr << "ERROR in BTagCalibration: "
0286 << "input file not available: "
0287 << filename;
0288 throw std::exception();
0289 }
0290 readCSV(ifs);
0291 ifs.close();
0292 }
0293
0294 void BTagCalibration::addEntry(const BTagEntry &entry)
0295 {
0296 data_[token(entry.params)].push_back(entry);
0297 }
0298
0299 const std::vector<BTagEntry>& BTagCalibration::getEntries(
0300 const BTagEntry::Parameters &par) const
0301 {
0302 std::string tok = token(par);
0303 if (!data_.count(tok)) {
0304 std::cerr << "ERROR in BTagCalibration: "
0305 << "(OperatingPoint, measurementType, sysType) not available: "
0306 << tok;
0307 throw std::exception();
0308 }
0309 return data_.at(tok);
0310 }
0311
0312 void BTagCalibration::readCSV(const std::string &s)
0313 {
0314 std::stringstream buff(s);
0315 readCSV(buff);
0316 }
0317
0318 void BTagCalibration::readCSV(std::istream &s)
0319 {
0320 std::string line;
0321
0322
0323 getline(s,line);
0324 if (line.find("OperatingPoint") == std::string::npos) {
0325 addEntry(BTagEntry(line));
0326 }
0327
0328 while (getline(s,line)) {
0329 line = BTagEntry::trimStr(line);
0330 if (line.empty()) {
0331 continue;
0332 }
0333 addEntry(BTagEntry(line));
0334 }
0335 }
0336
0337 void BTagCalibration::makeCSV(std::ostream &s) const
0338 {
0339 s << tagger_ << ";" << BTagEntry::makeCSVHeader();
0340 for (std::map<std::string, std::vector<BTagEntry> >::const_iterator i
0341 = data_.cbegin(); i != data_.cend(); ++i) {
0342 const std::vector<BTagEntry> &vec = i->second;
0343 for (std::vector<BTagEntry>::const_iterator j
0344 = vec.cbegin(); j != vec.cend(); ++j) {
0345 s << j->makeCSVLine();
0346 }
0347 }
0348 }
0349
0350 std::string BTagCalibration::makeCSV() const
0351 {
0352 std::stringstream buff;
0353 makeCSV(buff);
0354 return buff.str();
0355 }
0356
0357 std::string BTagCalibration::token(const BTagEntry::Parameters &par)
0358 {
0359 std::stringstream buff;
0360 buff << par.operatingPoint << ", "
0361 << par.measurementType << ", "
0362 << par.sysType;
0363 return buff.str();
0364 }
0365
0366
0367
0368
0369 class BTagCalibrationReader::BTagCalibrationReaderImpl
0370 {
0371 friend class BTagCalibrationReader;
0372
0373 public:
0374 struct TmpEntry {
0375 float etaMin;
0376 float etaMax;
0377 float ptMin;
0378 float ptMax;
0379 float discrMin;
0380 float discrMax;
0381 TF1 func;
0382 };
0383
0384 private:
0385 BTagCalibrationReaderImpl(BTagEntry::OperatingPoint op,
0386 const std::string & sysType,
0387 const std::vector<std::string> & otherSysTypes={});
0388
0389 void load(const BTagCalibration & c,
0390 BTagEntry::JetFlavor jf,
0391 std::string measurementType);
0392
0393 double eval(BTagEntry::JetFlavor jf,
0394 float eta,
0395 float pt,
0396 float discr) const;
0397
0398 double eval_auto_bounds(const std::string & sys,
0399 BTagEntry::JetFlavor jf,
0400 float eta,
0401 float pt,
0402 float discr) const;
0403
0404 std::pair<float, float> min_max_pt(BTagEntry::JetFlavor jf,
0405 float eta,
0406 float discr) const;
0407
0408 std::pair<float, float> min_max_eta(BTagEntry::JetFlavor jf,
0409 float discr) const;
0410
0411 BTagEntry::OperatingPoint op_;
0412 std::string sysType_;
0413 std::vector<std::vector<TmpEntry> > tmpData_;
0414 std::vector<bool> useAbsEta_;
0415 std::map<std::string, std::shared_ptr<BTagCalibrationReaderImpl>> otherSysTypeReaders_;
0416 };
0417
0418
0419 BTagCalibrationReader::BTagCalibrationReaderImpl::BTagCalibrationReaderImpl(
0420 BTagEntry::OperatingPoint op,
0421 const std::string & sysType,
0422 const std::vector<std::string> & otherSysTypes):
0423 op_(op),
0424 sysType_(sysType),
0425 tmpData_(3),
0426 useAbsEta_(3, true)
0427 {
0428 for (const std::string & ost : otherSysTypes) {
0429 if (otherSysTypeReaders_.count(ost)) {
0430 std::cerr << "ERROR in BTagCalibration: "
0431 << "Every otherSysType should only be given once. Duplicate: "
0432 << ost;
0433 throw std::exception();
0434 }
0435 otherSysTypeReaders_[ost] = std::unique_ptr<BTagCalibrationReaderImpl>(
0436 new BTagCalibrationReaderImpl(op, ost)
0437 );
0438 }
0439 }
0440
0441 void BTagCalibrationReader::BTagCalibrationReaderImpl::load(
0442 const BTagCalibration & c,
0443 BTagEntry::JetFlavor jf,
0444 std::string measurementType)
0445 {
0446 if (!tmpData_[jf].empty()) {
0447 std::cerr << "ERROR in BTagCalibration: "
0448 << "Data for this jet-flavor is already loaded: "
0449 << jf;
0450 throw std::exception();
0451 }
0452
0453 BTagEntry::Parameters params(op_, measurementType, sysType_);
0454 const std::vector<BTagEntry> &entries = c.getEntries(params);
0455
0456 for (const auto &be : entries) {
0457 if (be.params.jetFlavor != jf) {
0458 continue;
0459 }
0460
0461 TmpEntry te;
0462 te.etaMin = be.params.etaMin;
0463 te.etaMax = be.params.etaMax;
0464 te.ptMin = be.params.ptMin;
0465 te.ptMax = be.params.ptMax;
0466 te.discrMin = be.params.discrMin;
0467 te.discrMax = be.params.discrMax;
0468
0469 if (op_ == BTagEntry::OP_RESHAPING) {
0470 te.func = TF1("", be.formula.c_str(),
0471 be.params.discrMin, be.params.discrMax);
0472 } else {
0473 te.func = TF1("", be.formula.c_str(),
0474 be.params.ptMin, be.params.ptMax);
0475 }
0476
0477 tmpData_[be.params.jetFlavor].push_back(te);
0478 if (te.etaMin < 0) {
0479 useAbsEta_[be.params.jetFlavor] = false;
0480 }
0481 }
0482
0483 for (auto & p : otherSysTypeReaders_) {
0484 p.second->load(c, jf, measurementType);
0485 }
0486 }
0487
0488 double BTagCalibrationReader::BTagCalibrationReaderImpl::eval(
0489 BTagEntry::JetFlavor jf,
0490 float eta,
0491 float pt,
0492 float discr) const
0493 {
0494 bool use_discr = (op_ == BTagEntry::OP_RESHAPING);
0495 if (useAbsEta_[jf] && eta < 0) {
0496 eta = -eta;
0497 }
0498
0499
0500
0501 const auto &entries = tmpData_.at(jf);
0502 for (unsigned i=0; i<entries.size(); ++i) {
0503 const auto &e = entries.at(i);
0504 if (
0505 e.etaMin <= eta && eta <= e.etaMax
0506 && e.ptMin < pt && pt <= e.ptMax
0507 ){
0508 if (use_discr) {
0509 if (e.discrMin <= discr && discr < e.discrMax) {
0510 return e.func.Eval(discr);
0511 }
0512 } else {
0513 return e.func.Eval(pt);
0514 }
0515 }
0516 }
0517
0518 return 0.;
0519 }
0520
0521 double BTagCalibrationReader::BTagCalibrationReaderImpl::eval_auto_bounds(
0522 const std::string & sys,
0523 BTagEntry::JetFlavor jf,
0524 float eta,
0525 float pt,
0526 float discr) const
0527 {
0528 auto sf_bounds_eta = min_max_eta(jf, discr);
0529 bool eta_is_out_of_bounds = false;
0530
0531 if (sf_bounds_eta.first < 0) sf_bounds_eta.first = -sf_bounds_eta.second;
0532
0533 if (useAbsEta_[jf] && eta < 0) {
0534 eta = -eta;
0535 }
0536
0537 if (eta <= sf_bounds_eta.first || eta > sf_bounds_eta.second ) {
0538 eta_is_out_of_bounds = true;
0539 }
0540
0541 if (eta_is_out_of_bounds) {
0542 return 1.;
0543 }
0544
0545
0546 auto sf_bounds = min_max_pt(jf, eta, discr);
0547 float pt_for_eval = pt;
0548 bool is_out_of_bounds = false;
0549
0550 if (pt <= sf_bounds.first) {
0551 pt_for_eval = sf_bounds.first + .0001;
0552 is_out_of_bounds = true;
0553 } else if (pt > sf_bounds.second) {
0554 pt_for_eval = sf_bounds.second - .0001;
0555 is_out_of_bounds = true;
0556 }
0557
0558
0559 double sf = eval(jf, eta, pt_for_eval, discr);
0560 if (sys == sysType_) {
0561 return sf;
0562 }
0563
0564
0565 if (!otherSysTypeReaders_.count(sys)) {
0566 std::cerr << "ERROR in BTagCalibration: "
0567 << "sysType not available (maybe not loaded?): "
0568 << sys;
0569 throw std::exception();
0570 }
0571 double sf_err = otherSysTypeReaders_.at(sys)->eval(jf, eta, pt_for_eval, discr);
0572 if (!is_out_of_bounds) {
0573 return sf_err;
0574 }
0575
0576
0577 sf_err = sf + 2*(sf_err - sf);
0578 return sf_err;
0579 }
0580
0581 std::pair<float, float> BTagCalibrationReader::BTagCalibrationReaderImpl::min_max_pt(
0582 BTagEntry::JetFlavor jf,
0583 float eta,
0584 float discr) const
0585 {
0586 bool use_discr = (op_ == BTagEntry::OP_RESHAPING);
0587 if (useAbsEta_[jf] && eta < 0) {
0588 eta = -eta;
0589 }
0590
0591 const auto &entries = tmpData_.at(jf);
0592 float min_pt = -1., max_pt = -1.;
0593 for (const auto & e: entries) {
0594 if (
0595 e.etaMin <= eta && eta <=e.etaMax
0596 ){
0597 if (min_pt < 0.) {
0598 min_pt = e.ptMin;
0599 max_pt = e.ptMax;
0600 continue;
0601 }
0602
0603 if (use_discr) {
0604 if (e.discrMin <= discr && discr < e.discrMax) {
0605 min_pt = min_pt < e.ptMin ? min_pt : e.ptMin;
0606 max_pt = max_pt > e.ptMax ? max_pt : e.ptMax;
0607 }
0608 } else {
0609 min_pt = min_pt < e.ptMin ? min_pt : e.ptMin;
0610 max_pt = max_pt > e.ptMax ? max_pt : e.ptMax;
0611 }
0612 }
0613 }
0614
0615 return std::make_pair(min_pt, max_pt);
0616 }
0617
0618 std::pair<float, float> BTagCalibrationReader::BTagCalibrationReaderImpl::min_max_eta(
0619 BTagEntry::JetFlavor jf,
0620 float discr) const
0621 {
0622 bool use_discr = (op_ == BTagEntry::OP_RESHAPING);
0623
0624 const auto &entries = tmpData_.at(jf);
0625 float min_eta = 0., max_eta = 0.;
0626 for (const auto & e: entries) {
0627
0628 if (use_discr) {
0629 if (e.discrMin <= discr && discr < e.discrMax) {
0630 min_eta = min_eta < e.etaMin ? min_eta : e.etaMin;
0631 max_eta = max_eta > e.etaMax ? max_eta : e.etaMax;
0632 }
0633 } else {
0634 min_eta = min_eta < e.etaMin ? min_eta : e.etaMin;
0635 max_eta = max_eta > e.etaMax ? max_eta : e.etaMax;
0636 }
0637 }
0638
0639
0640 return std::make_pair(min_eta, max_eta);
0641 }
0642
0643
0644 BTagCalibrationReader::BTagCalibrationReader(BTagEntry::OperatingPoint op,
0645 const std::string & sysType,
0646 const std::vector<std::string> & otherSysTypes):
0647 pimpl(new BTagCalibrationReaderImpl(op, sysType, otherSysTypes)) {}
0648
0649 void BTagCalibrationReader::load(const BTagCalibration & c,
0650 BTagEntry::JetFlavor jf,
0651 const std::string & measurementType)
0652 {
0653 pimpl->load(c, jf, measurementType);
0654 }
0655
0656 double BTagCalibrationReader::eval(BTagEntry::JetFlavor jf,
0657 float eta,
0658 float pt,
0659 float discr) const
0660 {
0661 return pimpl->eval(jf, eta, pt, discr);
0662 }
0663
0664 double BTagCalibrationReader::eval_auto_bounds(const std::string & sys,
0665 BTagEntry::JetFlavor jf,
0666 float eta,
0667 float pt,
0668 float discr) const
0669 {
0670 return pimpl->eval_auto_bounds(sys, jf, eta, pt, discr);
0671 }
0672
0673 std::pair<float, float> BTagCalibrationReader::min_max_pt(BTagEntry::JetFlavor jf,
0674 float eta,
0675 float discr) const
0676 {
0677 return pimpl->min_max_pt(jf, eta, discr);
0678 }
0679
0680
0681
0682