Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:49

0001 import ROOT
0002 import numpy as np
0003 
0004 class BTagWeightCalculator:
0005     """
0006     Calculates the jet and event correction factor as a weight based on the b-tagger shape-dependent data/mc 
0007     corrections.
0008 
0009     Currently, the recipe is only described in https://twiki.cern.ch/twiki/bin/viewauth/CMS/TTbarHbbRun2ReferenceAnalysis#Applying_CSV_weights
0010 
0011     In short, jet-by-jet correction factors as a function of pt, eta and CSV have been derived.
0012     This code accesses the flavour of MC jets and gets the correct weight histogram
0013     corresponding to the pt, eta and flavour of the jet.
0014     From there, the per-jet weight is just accessed according to the value of the discriminator.
0015     """
0016     def __init__(self, fn_hf, fn_lf) :
0017         self.pdfs = {}
0018 
0019         #bin edges of the heavy-flavour histograms
0020         #pt>=20 && pt<30 -> bin=0
0021         #pt>=30 && pt<40 -> bin=1
0022         #etc
0023         self.pt_bins_hf = np.array([20, 30, 40, 60, 100])
0024         self.eta_bins_hf = np.array([0, 2.41])
0025 
0026         #bin edges of the light-flavour histograms 
0027         self.pt_bins_lf = np.array([20, 30, 40, 60])
0028         self.eta_bins_lf = np.array([0, 0.8, 1.6, 2.41])
0029 
0030         #name of the default b-tagger
0031         self.btag = "pfCombinedInclusiveSecondaryVertexV2BJetTags"
0032         self.init(fn_hf, fn_lf)
0033 
0034         # systematic uncertainties for different flavour assignments
0035         self.systematics_for_b = ["JESUp", "JESDown", "LFUp", "LFDown",
0036                                   "HFStats1Up", "HFStats1Down", "HFStats2Up", "HFStats2Down"]
0037         self.systematics_for_c = ["cErr1Up", "cErr1Down", "cErr2Up", "cErr2Down"]
0038         self.systematics_for_l = ["JESUp", "JESDown", "HFUp", "HFDown",
0039                                   "LFStats1Up", "LFStats1Down", "LFStats2Up", "LFStats2Down"]
0040 
0041     def getBin(self, bvec, val):
0042         return int(bvec.searchsorted(val, side="right")) - 1
0043 
0044     def init(self, fn_hf, fn_lf):
0045         """
0046         fn_hf (string) - path to the heavy flavour weight file
0047         fn_lf (string) - path to the light flavour weight file
0048         """
0049         print("[BTagWeightCalculator]: Initializing from files", fn_hf, fn_lf)
0050 
0051         #print "hf"
0052         self.pdfs["hf"] = self.getHistosFromFile(fn_hf)
0053         #print "lf"
0054         self.pdfs["lf"] = self.getHistosFromFile(fn_lf)
0055 
0056         return True
0057 
0058     def getHistosFromFile(self, fn):
0059         """
0060         Initialized the lookup table for b-tag weight histograms based on jet
0061         pt, eta and flavour.
0062         The format of the weight file is similar to:
0063          KEY: TH1D     csv_ratio_Pt0_Eta0_final;1
0064          KEY: TH1D     csv_ratio_Pt0_Eta0_final_JESUp;1
0065          KEY: TH1D     csv_ratio_Pt0_Eta0_final_JESDown;1
0066          KEY: TH1D     csv_ratio_Pt0_Eta0_final_LFUp;1
0067          KEY: TH1D     csv_ratio_Pt0_Eta0_final_LFDown;1
0068          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats1Up;1
0069          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats1Down;1
0070          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats2Up;1
0071          KEY: TH1D     csv_ratio_Pt0_Eta0_final_Stats2Down;1
0072          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final;2
0073          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final;1
0074          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr1Up;1
0075          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr1Down;1
0076          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr2Up;1
0077          KEY: TH1D     c_csv_ratio_Pt0_Eta0_final_cErr2Down;1
0078         """
0079         ret = {}
0080         tf = ROOT.TFile(fn)
0081         if not tf or tf.IsZombie():
0082             raise FileNotFoundError("Could not open file {0}".format(fn))
0083         ROOT.gROOT.cd()
0084         for k in tf.GetListOfKeys():
0085             kn = k.GetName()
0086             if not (kn.startswith("csv_ratio") or kn.startswith("c_csv_ratio") ):
0087                 continue
0088             spl = kn.split("_")
0089             is_c = 1 if kn.startswith("c_csv_ratio") else 0
0090 
0091             if spl[2+is_c] == "all":
0092                 ptbin = -1
0093                 etabin = -1
0094                 kind = "all"
0095                 syst = "nominal"
0096             else:
0097                 ptbin = int(spl[2+is_c][2:])
0098                 etabin = int(spl[3+is_c][3:])
0099                 kind = spl[4+is_c]
0100                 if len(spl)==(6+is_c):
0101                     syst = spl[5+is_c]
0102                 else:
0103                     syst = "nominal"
0104             ret[(is_c, ptbin, etabin, kind, syst)] = k.ReadObj().Clone()
0105         return ret
0106 
0107     def calcJetWeight(self, jet, kind, systematic):
0108         """
0109         Calculates the per-jet correction factor.
0110         jet: either an object with the attributes pt, eta, mcFlavour, self.btag
0111              or a Heppy Jet
0112         kind: string specifying the name of the corrections. Usually "final".
0113         systematic: the correction systematic, e.g. "nominal", "JESUp", etc
0114      """
0115         #if jet is a simple class with attributes
0116         if isinstance(getattr(jet, "pt"), float):
0117             pt   = getattr(jet, "pt")
0118             aeta = abs(getattr(jet, "eta"))
0119             fl   = abs(getattr(jet, "hadronFlavour"))
0120             csv  = getattr(jet, self.btag)
0121         #if jet is a heppy Jet object
0122         else:
0123             #print "could not get jet", e
0124             pt   = jet.pt()
0125             aeta = abs(jet.eta())
0126             fl   = abs(jet.hadronFlavour())
0127             csv  = jet.btag(self.btag)
0128         return self.calcJetWeightImpl(pt, aeta, fl, csv, kind, systematic)
0129 
0130     def calcJetWeightImpl(self, pt, aeta, fl, csv, kind, systematic):
0131 
0132         is_b = (fl == 5)
0133         is_c = (fl == 4)
0134         is_l = not (is_b or is_c)
0135 
0136         #if evaluating a weight for systematic uncertainties, make sure the jet is affected. If not, return 'nominal' weight
0137         if systematic != "nominal":
0138             if (is_b and systematic not in self.systematics_for_b) or (is_c and systematic not in self.systematics_for_c) or (is_l and systematic not in self.systematics_for_l):
0139                 systematic = "nominal"
0140 
0141         #needed because the TH1 names for Stats are same for HF and LF
0142         if "Stats" in systematic:
0143             systematic = systematic[2:]
0144 
0145         if is_b or is_c:
0146             ptbin = self.getBin(self.pt_bins_hf, pt)
0147             etabin = self.getBin(self.eta_bins_hf, aeta)
0148         else:
0149             ptbin = self.getBin(self.pt_bins_lf, pt)
0150             etabin = self.getBin(self.eta_bins_lf, aeta)
0151 
0152         if ptbin < 0 or etabin < 0:
0153             #print "pt or eta bin outside range", pt, aeta, ptbin, etabin
0154             return 1.0
0155 
0156         k = (is_c, ptbin, etabin, kind, systematic)
0157         hdict = self.pdfs["lf"]
0158         if is_b or is_c:
0159             hdict = self.pdfs["hf"]
0160         h = hdict.get(k, None)
0161         if not h:
0162             #print "no histogram", k
0163             return 1.0
0164 
0165         if csv > 1:
0166             csv = 1
0167             
0168         csvbin = 1
0169         csvbin = h.FindBin(csv)
0170         #This is to fix csv=-10 not being accounted for in CSV SF input hists
0171         if csvbin <= 0:
0172             csvbin = 1
0173         if csvbin > h.GetNbinsX():
0174             csvbin = h.GetNbinsX()
0175 
0176         w = h.GetBinContent(csvbin)
0177         return w
0178 
0179     def calcEventWeight(self, jets, kind, systematic):
0180         """
0181         The per-event weight is just a product of per-jet weights.
0182         """
0183         weights = np.array(
0184             [self.calcJetWeight(jet, kind, systematic)
0185             for jet in jets]
0186         )
0187 
0188         wtot = np.prod(weights)
0189         return wtot