Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:21

0001 #!/usr/bin/env python
0002 
0003 import sys, os
0004 
0005 try:
0006     import ROOT
0007 except ImportError:
0008     print(file=sys.stderr)
0009     print("  Error importing the ROOT python module", file=sys.stderr)
0010     print("  Try e.g. initializing a CMSSW environment", file=sys.stderr)
0011     print("  prior to starting this script", file=sys.stderr)
0012     print(file=sys.stderr)
0013     sys.exit(1)
0014 
0015 #----------------------------------------------------------------------
0016 
0017 def splitAtCapitalization(text):
0018     """ splits a string before capital letters. Useful to make
0019     identifiers which consist of capitalized words easier to read
0020 
0021     We should actually find a smarter algorithm in order to avoid
0022     splitting things like HLT or LW. 
0023 
0024     """
0025 
0026     retval = ''
0027 
0028     for ch in text:
0029         if ch.isupper() and len(retval) > 0:
0030             retval += ' '
0031 
0032         retval += ch
0033 
0034     return retval
0035     
0036 #----------------------------------------------------------------------
0037 ## @param fin is the ROOT input file (the TFile, not the file name)
0038 #
0039 def findTopDir(fin):
0040     """tries to find a top directory for the DQM histograms. Note
0041     that the run number seems to be always 1 for MC but differs
0042     for data. If there is more than one top directory, this function
0043     prints an error message on stderr and exits (maybe this should
0044     be made more flexible in the future in order to allow DQM histogramming
0045     of data of multiple runs).
0046 
0047     Returns None if no full path could be found.
0048 
0049     """
0050 
0051     import re
0052 
0053     # an path looks like:
0054     # "DQMData/Run <run>/HLT/Run summary/HLTEgammaValidation"
0055 
0056     theDir = fin.Get("DQMData")
0057 
0058     if theDir == None:
0059         return None
0060 
0061     # now look for directories of the form 'Run %d'
0062 
0063     runSubdirName = None
0064 
0065     for subdirName in [ x.GetName() for x in theDir.GetListOfKeys() ]:
0066 
0067         if re.match("Run \d+$", subdirName):
0068             if runSubdirName != None:
0069                 # more than one run found
0070                 print("more than one run found in the DQM file, this is currently not supported", file=sys.stderr)
0071                 sys.exit(1)
0072 
0073             runSubdirName = subdirName
0074 
0075 
0076     # check that we have at least (exactly) one directory
0077     if runSubdirName == None:
0078         return None
0079 
0080     # get the rest
0081     return theDir.Get(runSubdirName + "/HLT/Run summary/HLTEgammaValidation")
0082  
0083 
0084 #----------------------------------------------------------------------
0085 # main
0086 #----------------------------------------------------------------------
0087 from optparse import OptionParser
0088 
0089 parser = OptionParser("""
0090 
0091   usage: %prog [options] root_file
0092 
0093     given the output of the E/gamma HLT validation histogramming module,
0094     (DQM output) prints some information about path and module efficiencies.
0095 
0096     Useful for determining which paths actually have some meaningful
0097     results in the file and which ones not.
0098 """)
0099 
0100 parser.add_option("--summary",
0101                   dest="summary_mode",
0102                   default = False,
0103                   action="store_true",
0104                   help="print path efficiencies only, nothing about modules",
0105                   )
0106 
0107 parser.add_option("--path",
0108                   dest="selected_paths",
0109                   default = [],
0110                   action="append",
0111                   help="restrict printout to specific path. "+ 
0112                        "This option can be given more than once to select several paths.",
0113                   )
0114 
0115 parser.add_option("--ignore-empty",
0116                   dest="ignore_empty_paths",
0117                   action='store_true',
0118                   default = False,
0119                   help="Print only information about non-empty paths (i.e. those with at least one entry in the total_eff histogram).",
0120                   )
0121 
0122 parser.add_option("--ignore-zero-eff",
0123                   dest="ignore_zero_efficiency",
0124                   action='store_true',
0125                   default = False,
0126                   help="Print only information about paths which have at least one entry in the bin of the last module in the overview histogram. Note that this also excludes those paths excluded by --ignore-empty .",
0127                   )
0128 
0129 parser.add_option("--no-split-names",
0130                   dest="split_names",
0131                   action='store_false',
0132                   default = True,
0133                   help="Do not split module names.",
0134                   )
0135 
0136 
0137 (options, ARGV) = parser.parse_args()
0138 
0139 if len(ARGV) != 1:
0140     parser.print_help()
0141     sys.exit(1)
0142 
0143 #----------------------------------------
0144 # open the ROOT file
0145 #----------------------------------------
0146 
0147 fin = ROOT.TFile.Open(ARGV[0])
0148 
0149 top_dir = findTopDir(fin)
0150 
0151 if top_dir == None:
0152     print("could not find a top directory inside root file", file=sys.stderr)
0153     print("A typical top directory for MC is 'DQMData/Run 1/HLT/Run summary/HLTEgammaValidation'", file=sys.stderr)
0154     print(file=sys.stderr)
0155     print("Exiting", file=sys.stderr)
0156     sys.exit(1)
0157 
0158 
0159 #--------------------
0160 # determine the length of the longest path name (for nice printout)
0161 #--------------------
0162 
0163 maxPathNameLen = 100
0164 allPathNames = []
0165 
0166 for path_key in top_dir.GetListOfKeys():
0167     pathName = path_key.GetName()
0168 
0169     # just select directories (there are also other
0170     # objects in the top directory)
0171     path_dir = top_dir.Get(pathName)
0172     if not isinstance(path_dir,ROOT.TDirectoryFile):
0173         continue
0174 
0175     if len(options.selected_paths) != 0 and not pathName in options.selected_paths:
0176         continue
0177 
0178     # further checks which are done in the next
0179     # loop are not repeated here.
0180     # so we might get a maximum number of characters
0181     # which is slightly too high (but the code here
0182     # is more readable)
0183 
0184     allPathNames.append(pathName)
0185 
0186     maxPathNameLen = max(maxPathNameLen, len(pathName))
0187 
0188 #--------------------
0189 
0190 for path_name in allPathNames:
0191 
0192     path_dir = top_dir.Get(path_name)
0193 
0194     # just select directories (there are also other
0195     # objects in the top directory)
0196     if not isinstance(path_dir,ROOT.TDirectoryFile):
0197         continue
0198 
0199     # find modules in order from total_eff_MC_matched histogram
0200     total_eff_histo = path_dir.Get("total_eff_MC_matched")
0201 
0202     if total_eff_histo == None:
0203         # try with data:
0204         total_eff_histo = path_dir.Get("total_eff_RECO_matched")
0205 
0206     # subtract 2 for 'Total' and 'Gen' bins
0207     num_modules = total_eff_histo.GetNbinsX() - 2
0208 
0209     total = total_eff_histo.GetBinContent(num_modules)
0210     num_gen_events = total_eff_histo.GetBinContent(num_modules + 2)
0211 
0212     if num_gen_events == 0 and options.ignore_empty_paths:
0213         continue
0214 
0215     # check whether at least one event passed all modules
0216     if options.ignore_zero_efficiency:
0217         # get number of entries in last module
0218 
0219         last_module_index = num_modules - 1
0220         
0221         last_module_accepted_events = total_eff_histo.GetBinContent(last_module_index+1)
0222 
0223         if last_module_accepted_events < 1:
0224             continue
0225 
0226     
0227     #--------------------
0228 
0229     if not options.summary_mode:
0230         print("----------------------------------------")
0231 
0232     print(("PATH: %-" + str(maxPathNameLen) + "s") % path_name, end=' ')
0233 
0234     if num_gen_events > 0:
0235         print("(%5.1f%% eff.)" % (100 * total / float(num_gen_events)), end=' ')
0236 
0237     elif options.summary_mode:
0238         print("(no entries)", end=' ')
0239 
0240     print()
0241 
0242     if not options.summary_mode:
0243         print("----------------------------------------")
0244 
0245         print("  %-80s: %5d events" % ('generated', num_gen_events))
0246 
0247     if options.summary_mode:
0248         continue
0249 
0250     previous_module_output = num_gen_events
0251 
0252     print()
0253 
0254     for i in range(num_modules):
0255 
0256         module_name = total_eff_histo.GetXaxis().GetBinLabel(i+1)
0257 
0258         if options.split_names:
0259             module_name = splitAtCapitalization(module_name)
0260 
0261         events = total_eff_histo.GetBinContent(i+1)
0262 
0263         
0264 
0265 
0266         print("  %-90s: %5d events" % (module_name, events), end=' ')
0267 
0268         if previous_module_output > 0:
0269             eff = 100 * events / float(previous_module_output)
0270             print("(%5.1f%% eff.)" % (eff), end=' ')
0271             if eff > 100.:
0272                 if module_name.find("Unseeded") >= 0:
0273                     print(">100% Unseeded Filter", end=' ')
0274                 else:
0275                     print("ERROR", end=' ')
0276 
0277         print()
0278                                      
0279 
0280         previous_module_output = events
0281 
0282 
0283     print() 
0284     
0285 
0286     
0287 
0288