Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:32:28

0001 from __future__ import print_function
0002 import ROOT
0003 import numpy as np
0004 
0005 class BTagWeightCalculator:
0006     """
0007     Calculates the jet and event correction factor as a weight based on the b-tagger shape-dependent data/mc 
0008     corrections.
0009 
0010     Currently, the recipe is only described in https://twiki.cern.ch/twiki/bin/viewauth/CMS/TTbarHbbRun2ReferenceAnalysis#Applying_CSV_weights
0011 
0012     In short, jet-by-jet correction factors as a function of pt, eta and CSV have been derived.
0013     This code accesses the flavour of MC jets and gets the correct weight histogram
0014     corresponding to the pt, eta and flavour of the jet.
0015     From there, the per-jet weight is just accessed according to the value of the discriminator.
0016     """
0017     def __init__(self, fn_hf, fn_lf) :
0018         self.pdfs = {}
0019 
0020         #bin edges of the heavy-flavour histograms
0021         #pt>=20 && pt<30 -> bin=0
0022         #pt>=30 && pt<40 -> bin=1
0023         #etc
0024         self.pt_bins_hf = np.array([20, 30, 40, 60, 100])
0025         self.eta_bins_hf = np.array([0, 2.41])
0026 
0027         #bin edges of the light-flavour histograms 
0028         self.pt_bins_lf = np.array([20, 30, 40, 60])
0029         self.eta_bins_lf = np.array([0, 0.8, 1.6, 2.41])
0030 
0031         #name of the default b-tagger
0032         self.btag = "pfCombinedInclusiveSecondaryVertexV2BJetTags"
0033         self.init(fn_hf, fn_lf)
0034 
0035         # systematic uncertainties for different flavour assignments
0036         self.systematics_for_b = ["JESUp", "JESDown", "LFUp", "LFDown",
0037                                   "HFStats1Up", "HFStats1Down", "HFStats2Up", "HFStats2Down"]
0038         self.systematics_for_c = ["cErr1Up", "cErr1Down", "cErr2Up", "cErr2Down"]
0039         self.systematics_for_l = ["JESUp", "JESDown", "HFUp", "HFDown",
0040                                   "LFStats1Up", "LFStats1Down", "LFStats2Up", "LFStats2Down"]
0041 
0042     def getBin(self, bvec, val):
0043         return int(bvec.searchsorted(val, side="right")) - 1
0044 
0045     def init(self, fn_hf, fn_lf):
0046         """
0047         fn_hf (string) - path to the heavy flavour weight file
0048         fn_lf (string) - path to the light flavour weight file
0049         """
0050         print("[BTagWeightCalculator]: Initializing from files", fn_hf, fn_lf)
0051 
0052         #print "hf"
0053         self.pdfs["hf"] = self.getHistosFromFile(fn_hf)
0054         #print "lf"
0055         self.pdfs["lf"] = self.getHistosFromFile(fn_lf)
0056 
0057         return True
0058 
0059     def getHistosFromFile(self, fn):
0060         """
0061         Initialized the lookup table for b-tag weight histograms based on jet
0062         pt, eta and flavour.
0063         The format of the weight file is similar to:
0064          KEY: TH1D     csv_ratio_Pt0_Eta0_final;1
0065          KEY: TH1D     csv_ratio_Pt0_Eta0_final_JESUp;1
0066          KEY: TH1D     csv_ratio_Pt0_Eta0_final_JESDown;1
0067          KEY: TH1D     csv_ratio_Pt0_Eta0_final_LFUp;1
0068          KEY: TH1D     csv_ratio_Pt0_Eta0_final_LFDown;1
0069          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats1Up;1
0070          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats1Down;1
0071          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats2Up;1
0072          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats2Down;1
0073          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final;2
0074          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final;1
0075          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr1Up;1
0076          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr1Down;1
0077          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr2Up;1
0078          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr2Down;1
0079         """
0080         ret = {}
0081         tf = ROOT.TFile(fn)
0082         if not tf or tf.IsZombie():
0083             raise FileNotFoundError("Could not open file {0}".format(fn))
0084         ROOT.gROOT.cd()
0085         for k in tf.GetListOfKeys():
0086             kn = k.GetName()
0087             if not (kn.startswith("csv_ratio") or kn.startswith("c_csv_ratio") ):
0088                 continue
0089             spl = kn.split("_")
0090             is_c = 1 if kn.startswith("c_csv_ratio") else 0
0091 
0092             if spl[2+is_c] == "all":
0093                 ptbin = -1
0094                 etabin = -1
0095                 kind = "all"
0096                 syst = "nominal"
0097             else:
0098                 ptbin = int(spl[2+is_c][2:])
0099                 etabin = int(spl[3+is_c][3:])
0100                 kind = spl[4+is_c]
0101                 if len(spl)==(6+is_c):
0102                     syst = spl[5+is_c]
0103                 else:
0104                     syst = "nominal"
0105             ret[(is_c, ptbin, etabin, kind, syst)] = k.ReadObj().Clone()
0106         return ret
0107 
0108     def calcJetWeight(self, jet, kind, systematic):
0109         """
0110         Calculates the per-jet correction factor.
0111         jet: either an object with the attributes pt, eta, mcFlavour, self.btag
0112              or a Heppy Jet
0113         kind: string specifying the name of the corrections. Usually "final".
0114         systematic: the correction systematic, e.g. "nominal", "JESUp", etc
0115      """
0116         #if jet is a simple class with attributes
0117         if isinstance(getattr(jet, "pt"), float):
0118             pt   = getattr(jet, "pt")
0119             aeta = abs(getattr(jet, "eta"))
0120             fl   = abs(getattr(jet, "hadronFlavour"))
0121             csv  = getattr(jet, self.btag)
0122         #if jet is a heppy Jet object
0123         else:
0124             #print "could not get jet", e
0125             pt   = jet.pt()
0126             aeta = abs(jet.eta())
0127             fl   = abs(jet.hadronFlavour())
0128             csv  = jet.btag(self.btag)
0129         return self.calcJetWeightImpl(pt, aeta, fl, csv, kind, systematic)
0130 
0131     def calcJetWeightImpl(self, pt, aeta, fl, csv, kind, systematic):
0132 
0133         is_b = (fl == 5)
0134         is_c = (fl == 4)
0135         is_l = not (is_b or is_c)
0136 
0137         #if evaluating a weight for systematic uncertainties, make sure the jet is affected. If not, return 'nominal' weight
0138         if systematic != "nominal":
0139             if (is_b and systematic not in self.systematics_for_b) or (is_c and systematic not in self.systematics_for_c) or (is_l and systematic not in self.systematics_for_l):
0140                 systematic = "nominal"
0141 
0142         #needed because the TH1 names for Stats are same for HF and LF
0143         if "Stats" in systematic:
0144             systematic = systematic[2:]
0145 
0146         if is_b or is_c:
0147             ptbin = self.getBin(self.pt_bins_hf, pt)
0148             etabin = self.getBin(self.eta_bins_hf, aeta)
0149         else:
0150             ptbin = self.getBin(self.pt_bins_lf, pt)
0151             etabin = self.getBin(self.eta_bins_lf, aeta)
0152 
0153         if ptbin < 0 or etabin < 0:
0154             #print "pt or eta bin outside range", pt, aeta, ptbin, etabin
0155             return 1.0
0156 
0157         k = (is_c, ptbin, etabin, kind, systematic)
0158         hdict = self.pdfs["lf"]
0159         if is_b or is_c:
0160             hdict = self.pdfs["hf"]
0161         h = hdict.get(k, None)
0162         if not h:
0163             #print "no histogram", k
0164             return 1.0
0165 
0166         if csv > 1:
0167             csv = 1
0168             
0169         csvbin = 1
0170         csvbin = h.FindBin(csv)
0171         #This is to fix csv=-10 not being accounted for in CSV SF input hists
0172         if csvbin <= 0:
0173             csvbin = 1
0174         if csvbin > h.GetNbinsX():
0175             csvbin = h.GetNbinsX()
0176 
0177         w = h.GetBinContent(csvbin)
0178         return w
0179 
0180     def calcEventWeight(self, jets, kind, systematic):
0181         """
0182         The per-event weight is just a product of per-jet weights.
0183         """
0184         weights = np.array(
0185             [self.calcJetWeight(jet, kind, systematic)
0186             for jet in jets]
0187         )
0188 
0189         wtot = np.prod(weights)
0190         return wtot