Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:01:56

0001 #include <algorithm>
0002 #include <sstream>
0003 
0004 #include "CondFormats/BTauObjects/interface/BTagEntry.h"
0005 #include "FWCore/Utilities/interface/Exception.h"
0006 
0007 BTagEntry::Parameters::Parameters(OperatingPoint op,
0008                                   std::string measurement_type,
0009                                   std::string sys_type,
0010                                   JetFlavor jf,
0011                                   float eta_min,
0012                                   float eta_max,
0013                                   float pt_min,
0014                                   float pt_max,
0015                                   float discr_min,
0016                                   float discr_max)
0017     : operatingPoint(op),
0018       measurementType(measurement_type),
0019       sysType(sys_type),
0020       jetFlavor(jf),
0021       etaMin(eta_min),
0022       etaMax(eta_max),
0023       ptMin(pt_min),
0024       ptMax(pt_max),
0025       discrMin(discr_min),
0026       discrMax(discr_max) {
0027   std::transform(measurementType.begin(), measurementType.end(), measurementType.begin(), ::tolower);
0028   std::transform(sysType.begin(), sysType.end(), sysType.begin(), ::tolower);
0029 }
0030 
0031 BTagEntry::BTagEntry(const std::string& csvLine, bool validate) {
0032   // make tokens
0033   std::stringstream buff(csvLine);
0034   std::vector<std::string> vec;
0035   std::string token;
0036   while (std::getline(buff, token, ","[0])) {
0037     token = BTagEntry::trimStr(token);
0038     if (token.empty()) {
0039       continue;
0040     }
0041     vec.push_back(token);
0042   }
0043   if (vec.size() != 11) {
0044     throw cms::Exception("BTagCalibration") << "Invalid csv line; num tokens != 11: " << csvLine;
0045   }
0046 
0047   // clean string values
0048   char chars[] = " \"\n";
0049   for (unsigned int i = 0; i < strlen(chars); ++i) {
0050     vec[1].erase(remove(vec[1].begin(), vec[1].end(), chars[i]), vec[1].end());
0051     vec[2].erase(remove(vec[2].begin(), vec[2].end(), chars[i]), vec[2].end());
0052     vec[10].erase(remove(vec[10].begin(), vec[10].end(), chars[i]), vec[10].end());
0053   }
0054 
0055   // make formula
0056   formula = vec[10];
0057   if (validate) {
0058     TF1 f1("", formula.c_str());  // compile formula to check validity
0059     if (f1.IsZombie()) {
0060       throw cms::Exception("BTagCalibration") << "Invalid csv line; formula does not compile: " << csvLine;
0061     }
0062   }
0063 
0064   // make parameters
0065   unsigned op = stoi(vec[0]);
0066   if (op > 3) {
0067     throw cms::Exception("BTagCalibration") << "Invalid csv line; OperatingPoint > 3: " << csvLine;
0068   }
0069   unsigned jf = stoi(vec[3]);
0070   if (jf > 2) {
0071     throw cms::Exception("BTagCalibration") << "Invalid csv line; JetFlavor > 2: " << csvLine;
0072   }
0073   params = BTagEntry::Parameters(BTagEntry::OperatingPoint(op),
0074                                  vec[1],
0075                                  vec[2],
0076                                  BTagEntry::JetFlavor(jf),
0077                                  stof(vec[4]),
0078                                  stof(vec[5]),
0079                                  stof(vec[6]),
0080                                  stof(vec[7]),
0081                                  stof(vec[8]),
0082                                  stof(vec[9]));
0083 }
0084 
0085 BTagEntry::BTagEntry(const std::string& func, BTagEntry::Parameters p) : formula(func), params(p) {
0086   TF1 f1("", formula.c_str());  // compile formula to check validity
0087   if (f1.IsZombie()) {
0088     throw cms::Exception("BTagCalibration") << "Invalid func string; formula does not compile: " << func;
0089   }
0090 }
0091 
0092 BTagEntry::BTagEntry(const TF1* func, BTagEntry::Parameters p)
0093     : formula(std::string(func->GetExpFormula("p").Data())), params(p) {
0094   if (func->IsZombie()) {
0095     throw cms::Exception("BTagCalibration") << "Invalid TF1 function; function is zombie: " << func->GetName();
0096   }
0097 }
0098 
0099 // Creates chained step functions like this:
0100 // "<prevous_bin> : x<bin_high_bound ? bin_value : <next_bin>"
0101 // e.g. "x<0 ? 1 : x<1 ? 2 : x<2 ? 3 : 4"
0102 std::string th1ToFormulaLin(const TH1* hist) {
0103   int nbins = hist->GetNbinsX();
0104   TAxis const* axis = hist->GetXaxis();
0105   std::stringstream buff;
0106   buff << "x<" << axis->GetBinLowEdge(1) << " ? 0. : ";  // default value
0107   for (int i = 1; i < nbins + 1; ++i) {
0108     char tmp_buff[50];
0109     sprintf(tmp_buff,
0110             "x<%g ? %g : ",  // %g is the smaller one of %e or %f
0111             axis->GetBinUpEdge(i),
0112             hist->GetBinContent(i));
0113     buff << tmp_buff;
0114   }
0115   buff << 0.;  // default value
0116   return buff.str();
0117 }
0118 
0119 // Creates step functions making a binary search tree:
0120 // "x<mid_bin_bound ? (<left side tree>) : (<right side tree>)"
0121 // e.g. "x<2 ? (x<1 ? (x<0 ? 0:0.1) : (1)) : (x<4 ? (x<3 ? 2:3) : (0))"
0122 std::string th1ToFormulaBinTree(const TH1* hist, int start = 0, int end = -1) {
0123   if (end == -1) {  // initialize
0124     start = 0.;
0125     end = hist->GetNbinsX() + 1;
0126     TH1* h2 = (TH1*)hist->Clone();
0127     h2->SetBinContent(start, 0);  // kill underflow
0128     h2->SetBinContent(end, 0);    // kill overflow
0129     std::string res = th1ToFormulaBinTree(h2, start, end);
0130     delete h2;
0131     return res;
0132   }
0133   if (start == end) {  // leave is reached
0134     char tmp_buff[20];
0135     sprintf(tmp_buff, "%g", hist->GetBinContent(start));
0136     return std::string(tmp_buff);
0137   }
0138   if (start == end - 1) {  // no parenthesis for neighbors
0139     char tmp_buff[70];
0140     sprintf(tmp_buff,
0141             "x<%g ? %g:%g",
0142             hist->GetXaxis()->GetBinUpEdge(start),
0143             hist->GetBinContent(start),
0144             hist->GetBinContent(end));
0145     return std::string(tmp_buff);
0146   }
0147 
0148   // top-down recursion
0149   std::stringstream buff;
0150   int mid = (end - start) / 2 + start;
0151   char tmp_buff[25];
0152   sprintf(tmp_buff, "x<%g ? (", hist->GetXaxis()->GetBinUpEdge(mid));
0153   buff << tmp_buff << th1ToFormulaBinTree(hist, start, mid) << ") : (" << th1ToFormulaBinTree(hist, mid + 1, end)
0154        << ")";
0155   return buff.str();
0156 }
0157 
0158 BTagEntry::BTagEntry(const TH1* hist, BTagEntry::Parameters p) : params(p) {
0159   int nbins = hist->GetNbinsX();
0160   TAxis const* axis = hist->GetXaxis();
0161 
0162   // overwrite bounds with histo values
0163   if (params.operatingPoint == BTagEntry::OP_RESHAPING) {
0164     params.discrMin = axis->GetBinLowEdge(1);
0165     params.discrMax = axis->GetBinUpEdge(nbins);
0166   } else {
0167     params.ptMin = axis->GetBinLowEdge(1);
0168     params.ptMax = axis->GetBinUpEdge(nbins);
0169   }
0170 
0171   // balanced full binary tree height = ceil(log(2*n_leaves)/log(2))
0172   // breakes even around 10, but lower values are more propable in pt-spectrum
0173   if (nbins < 15) {
0174     formula = th1ToFormulaLin(hist);
0175   } else {
0176     formula = th1ToFormulaBinTree(hist);
0177   }
0178 
0179   // compile formula to check validity
0180   TF1 f1("", formula.c_str());
0181   if (f1.IsZombie()) {
0182     throw cms::Exception("BTagCalibration")
0183         << "Invalid histogram; formula does not compile (>150 bins?): " << hist->GetName();
0184   }
0185 }
0186 
0187 std::string BTagEntry::makeCSVHeader() {
0188   return "OperatingPoint, "
0189          "measurementType, "
0190          "sysType, "
0191          "jetFlavor, "
0192          "etaMin, "
0193          "etaMax, "
0194          "ptMin, "
0195          "ptMax, "
0196          "discrMin, "
0197          "discrMax, "
0198          "formula \n";
0199 }
0200 
0201 std::string BTagEntry::makeCSVLine() const {
0202   std::stringstream buff;
0203   buff << params.operatingPoint << ", " << params.measurementType << ", " << params.sysType << ", " << params.jetFlavor
0204        << ", " << params.etaMin << ", " << params.etaMax << ", " << params.ptMin << ", " << params.ptMax << ", "
0205        << params.discrMin << ", " << params.discrMax << ", \"" << formula << "\" \n";
0206   return buff.str();
0207 }
0208 
0209 std::string BTagEntry::trimStr(std::string str) {
0210   size_t s = str.find_first_not_of(" \n\r\t");
0211   size_t e = str.find_last_not_of(" \n\r\t");
0212 
0213   if ((std::string::npos == s) || (std::string::npos == e))
0214     return "";
0215   else
0216     return str.substr(s, e - s + 1);
0217 }