Egamma/test/egammaHltPrintRelvalEfficiencies.py

0001 #!/usr/bin/env python
0002
0003 import sys, os
0004
0005 try:
0006     import ROOT
0007 except ImportError:
0008     print(file=sys.stderr)
0009     print("  Error importing the ROOT python module", file=sys.stderr)
0010     print("  Try e.g. initializing a CMSSW environment", file=sys.stderr)
0011     print("  prior to starting this script", file=sys.stderr)
0012     print(file=sys.stderr)
0013     sys.exit(1)
0014
0015 #----------------------------------------------------------------------
0016
0017 def splitAtCapitalization(text):
0018     """ splits a string before capital letters. Useful to make
0019     identifiers which consist of capitalized words easier to read
0020
0021     We should actually find a smarter algorithm in order to avoid
0022     splitting things like HLT or LW.
0023
0024     """
0025
0026     retval = ''
0027
0028     for ch in text:
0029         if ch.isupper() and len(retval) > 0:
0030             retval += ' '
0031
0032         retval += ch
0033
0034     return retval
0035
0036 #----------------------------------------------------------------------
0037 ## @param fin is the ROOT input file (the TFile, not the file name)
0038 #
0039 def findTopDir(fin):
0040     """tries to find a top directory for the DQM histograms. Note
0041     that the run number seems to be always 1 for MC but differs
0042     for data. If there is more than one top directory, this function
0043     prints an error message on stderr and exits (maybe this should
0044     be made more flexible in the future in order to allow DQM histogramming
0045     of data of multiple runs).
0046
0047     Returns None if no full path could be found.
0048
0049     """
0050
0051     import re
0052
0053     # an path looks like:
0054     # "DQMData/Run <run>/HLT/Run summary/HLTEgammaValidation"
0055
0056     theDir = fin.Get("DQMData")
0057
0058     if theDir == None:
0059         return None
0060
0061     # now look for directories of the form 'Run %d'
0062
0063     runSubdirName = None
0064
0065     for subdirName in [ x.GetName() for x in theDir.GetListOfKeys() ]:
0066
0067         if re.match("Run \d+$", subdirName):
0068             if runSubdirName != None:
0069                 # more than one run found
0070                 print("more than one run found in the DQM file, this is currently not supported", file=sys.stderr)
0071                 sys.exit(1)
0072
0073             runSubdirName = subdirName
0074
0075
0076     # check that we have at least (exactly) one directory
0077     if runSubdirName == None:
0078         return None
0079
0080     # get the rest
0081     return theDir.Get(runSubdirName + "/HLT/Run summary/HLTEgammaValidation")
0082
0083
0084 #----------------------------------------------------------------------
0085 # main
0086 #----------------------------------------------------------------------
0087 from optparse import OptionParser
0088
0089 parser = OptionParser("""
0090
0091   usage: %prog [options] root_file
0092
0093     given the output of the E/gamma HLT validation histogramming module,
0094     (DQM output) prints some information about path and module efficiencies.
0095
0096     Useful for determining which paths actually have some meaningful
0097     results in the file and which ones not.
0098 """)
0099
0100 parser.add_option("--summary",
0101                   dest="summary_mode",
0102                   default = False,
0103                   action="store_true",
0104                   help="print path efficiencies only, nothing about modules",
0105                   )
0106
0107 parser.add_option("--path",
0108                   dest="selected_paths",
0109                   default = [],
0110                   action="append",
0111                   help="restrict printout to specific path. "+
0112                        "This option can be given more than once to select several paths.",
0113                   )
0114
0115 parser.add_option("--ignore-empty",
0116                   dest="ignore_empty_paths",
0117                   action='store_true',
0118                   default = False,
0119                   help="Print only information about non-empty paths (i.e. those with at least one entry in the total_eff histogram).",
0120                   )
0121
0122 parser.add_option("--ignore-zero-eff",
0123                   dest="ignore_zero_efficiency",
0124                   action='store_true',
0125                   default = False,
0126                   help="Print only information about paths which have at least one entry in the bin of the last module in the overview histogram. Note that this also excludes those paths excluded by --ignore-empty .",
0127                   )
0128
0129 parser.add_option("--no-split-names",
0130                   dest="split_names",
0131                   action='store_false',
0132                   default = True,
0133                   help="Do not split module names.",
0134                   )
0135
0136
0137 (options, ARGV) = parser.parse_args()
0138
0139 if len(ARGV) != 1:
0140     parser.print_help()
0141     sys.exit(1)
0142
0143 #----------------------------------------
0144 # open the ROOT file
0145 #----------------------------------------
0146
0147 fin = ROOT.TFile.Open(ARGV[0])
0148
0149 top_dir = findTopDir(fin)
0150
0151 if top_dir == None:
0152     print("could not find a top directory inside root file", file=sys.stderr)
0153     print("A typical top directory for MC is 'DQMData/Run 1/HLT/Run summary/HLTEgammaValidation'", file=sys.stderr)
0154     print(file=sys.stderr)
0155     print("Exiting", file=sys.stderr)
0156     sys.exit(1)
0157
0158
0159 #--------------------
0160 # determine the length of the longest path name (for nice printout)
0161 #--------------------
0162
0163 maxPathNameLen = 100
0164 allPathNames = []
0165
0166 for path_key in top_dir.GetListOfKeys():
0167     pathName = path_key.GetName()
0168
0169     # just select directories (there are also other
0170     # objects in the top directory)
0171     path_dir = top_dir.Get(pathName)
0172     if not isinstance(path_dir,ROOT.TDirectoryFile):
0173         continue
0174
0175     if len(options.selected_paths) != 0 and not pathName in options.selected_paths:
0176         continue
0177
0178     # further checks which are done in the next
0179     # loop are not repeated here.
0180     # so we might get a maximum number of characters
0181     # which is slightly too high (but the code here
0182     # is more readable)
0183
0184     allPathNames.append(pathName)
0185
0186     maxPathNameLen = max(maxPathNameLen, len(pathName))
0187
0188 #--------------------
0189
0190 for path_name in allPathNames:
0191
0192     path_dir = top_dir.Get(path_name)
0193
0194     # just select directories (there are also other
0195     # objects in the top directory)
0196     if not isinstance(path_dir,ROOT.TDirectoryFile):
0197         continue
0198
0199     # find modules in order from total_eff_MC_matched histogram
0200     total_eff_histo = path_dir.Get("total_eff_MC_matched")
0201
0202     if total_eff_histo == None:
0203         # try with data:
0204         total_eff_histo = path_dir.Get("total_eff_RECO_matched")
0205
0206     # subtract 2 for 'Total' and 'Gen' bins
0207     num_modules = total_eff_histo.GetNbinsX() - 2
0208
0209     total = total_eff_histo.GetBinContent(num_modules)
0210     num_gen_events = total_eff_histo.GetBinContent(num_modules + 2)
0211
0212     if num_gen_events == 0 and options.ignore_empty_paths:
0213         continue
0214
0215     # check whether at least one event passed all modules
0216     if options.ignore_zero_efficiency:
0217         # get number of entries in last module
0218
0219         last_module_index = num_modules - 1
0220
0221         last_module_accepted_events = total_eff_histo.GetBinContent(last_module_index+1)
0222
0223         if last_module_accepted_events < 1:
0224             continue
0225
0226
0227     #--------------------
0228
0229     if not options.summary_mode:
0230         print("----------------------------------------")
0231
0232     print(("PATH: %-" + str(maxPathNameLen) + "s") % path_name, end=' ')
0233
0234     if num_gen_events > 0:
0235         print("(%5.1f%% eff.)" % (100 * total / float(num_gen_events)), end=' ')
0236
0237     elif options.summary_mode:
0238         print("(no entries)", end=' ')
0239
0240     print()
0241
0242     if not options.summary_mode:
0243         print("----------------------------------------")
0244
0245         print("  %-80s: %5d events" % ('generated', num_gen_events))
0246
0247     if options.summary_mode:
0248         continue
0249
0250     previous_module_output = num_gen_events
0251
0252     print()
0253
0254     for i in range(num_modules):
0255
0256         module_name = total_eff_histo.GetXaxis().GetBinLabel(i+1)
0257
0258         if options.split_names:
0259             module_name = splitAtCapitalization(module_name)
0260
0261         events = total_eff_histo.GetBinContent(i+1)
0262
0263
0264
0265
0266         print("  %-90s: %5d events" % (module_name, events), end=' ')
0267
0268         if previous_module_output > 0:
0269             eff = 100 * events / float(previous_module_output)
0270             print("(%5.1f%% eff.)" % (eff), end=' ')
0271             if eff > 100.:
0272                 if module_name.find("Unseeded") >= 0:
0273                     print(">100% Unseeded Filter", end=' ')
0274                 else:
0275                     print("ERROR", end=' ')
0276
0277         print()
0278
0279
0280         previous_module_output = events
0281
0282
0283     print()
0284
0285
0286
0287
0288