Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:18:51

0001 #!/usr/bin/env python
0002 
0003 from __future__ import print_function
0004 import sys, os
0005 
0006 try:
0007     import ROOT
0008 except ImportError:
0009     print(file=sys.stderr)
0010     print("  Error importing the ROOT python module", file=sys.stderr)
0011     print("  Try e.g. initializing a CMSSW environment", file=sys.stderr)
0012     print("  prior to starting this script", file=sys.stderr)
0013     print(file=sys.stderr)
0014     sys.exit(1)
0015 
0016 #----------------------------------------------------------------------
0017 
0018 def splitAtCapitalization(text):
0019     """ splits a string before capital letters. Useful to make
0020     identifiers which consist of capitalized words easier to read
0021 
0022     We should actually find a smarter algorithm in order to avoid
0023     splitting things like HLT or LW. 
0024 
0025     """
0026 
0027     retval = ''
0028 
0029     for ch in text:
0030         if ch.isupper() and len(retval) > 0:
0031             retval += ' '
0032 
0033         retval += ch
0034 
0035     return retval
0036     
0037 #----------------------------------------------------------------------
0038 ## @param fin is the ROOT input file (the TFile, not the file name)
0039 #
0040 def findTopDir(fin):
0041     """tries to find a top directory for the DQM histograms. Note
0042     that the run number seems to be always 1 for MC but differs
0043     for data. If there is more than one top directory, this function
0044     prints an error message on stderr and exits (maybe this should
0045     be made more flexible in the future in order to allow DQM histogramming
0046     of data of multiple runs).
0047 
0048     Returns None if no full path could be found.
0049 
0050     """
0051 
0052     import re
0053 
0054     # an path looks like:
0055     # "DQMData/Run <run>/HLT/Run summary/HLTEgammaValidation"
0056 
0057     theDir = fin.Get("DQMData")
0058 
0059     if theDir == None:
0060         return None
0061 
0062     # now look for directories of the form 'Run %d'
0063 
0064     runSubdirName = None
0065 
0066     for subdirName in [ x.GetName() for x in theDir.GetListOfKeys() ]:
0067 
0068         if re.match("Run \d+$", subdirName):
0069             if runSubdirName != None:
0070                 # more than one run found
0071                 print("more than one run found in the DQM file, this is currently not supported", file=sys.stderr)
0072                 sys.exit(1)
0073 
0074             runSubdirName = subdirName
0075 
0076 
0077     # check that we have at least (exactly) one directory
0078     if runSubdirName == None:
0079         return None
0080 
0081     # get the rest
0082     return theDir.Get(runSubdirName + "/HLT/Run summary/HLTEgammaValidation")
0083  
0084 
0085 #----------------------------------------------------------------------
0086 # main
0087 #----------------------------------------------------------------------
0088 from optparse import OptionParser
0089 
0090 parser = OptionParser("""
0091 
0092   usage: %prog [options] root_file
0093 
0094     given the output of the E/gamma HLT validation histogramming module,
0095     (DQM output) prints some information about path and module efficiencies.
0096 
0097     Useful for determining which paths actually have some meaningful
0098     results in the file and which ones not.
0099 """)
0100 
0101 parser.add_option("--summary",
0102                   dest="summary_mode",
0103                   default = False,
0104                   action="store_true",
0105                   help="print path efficiencies only, nothing about modules",
0106                   )
0107 
0108 parser.add_option("--path",
0109                   dest="selected_paths",
0110                   default = [],
0111                   action="append",
0112                   help="restrict printout to specific path. "+ 
0113                        "This option can be given more than once to select several paths.",
0114                   )
0115 
0116 parser.add_option("--ignore-empty",
0117                   dest="ignore_empty_paths",
0118                   action='store_true',
0119                   default = False,
0120                   help="Print only information about non-empty paths (i.e. those with at least one entry in the total_eff histogram).",
0121                   )
0122 
0123 parser.add_option("--ignore-zero-eff",
0124                   dest="ignore_zero_efficiency",
0125                   action='store_true',
0126                   default = False,
0127                   help="Print only information about paths which have at least one entry in the bin of the last module in the overview histogram. Note that this also excludes those paths excluded by --ignore-empty .",
0128                   )
0129 
0130 parser.add_option("--no-split-names",
0131                   dest="split_names",
0132                   action='store_false',
0133                   default = True,
0134                   help="Do not split module names.",
0135                   )
0136 
0137 
0138 (options, ARGV) = parser.parse_args()
0139 
0140 if len(ARGV) != 1:
0141     parser.print_help()
0142     sys.exit(1)
0143 
0144 #----------------------------------------
0145 # open the ROOT file
0146 #----------------------------------------
0147 
0148 fin = ROOT.TFile.Open(ARGV[0])
0149 
0150 top_dir = findTopDir(fin)
0151 
0152 if top_dir == None:
0153     print("could not find a top directory inside root file", file=sys.stderr)
0154     print("A typical top directory for MC is 'DQMData/Run 1/HLT/Run summary/HLTEgammaValidation'", file=sys.stderr)
0155     print(file=sys.stderr)
0156     print("Exiting", file=sys.stderr)
0157     sys.exit(1)
0158 
0159 
0160 #--------------------
0161 # determine the length of the longest path name (for nice printout)
0162 #--------------------
0163 
0164 maxPathNameLen = 100
0165 allPathNames = []
0166 
0167 for path_key in top_dir.GetListOfKeys():
0168     pathName = path_key.GetName()
0169 
0170     # just select directories (there are also other
0171     # objects in the top directory)
0172     path_dir = top_dir.Get(pathName)
0173     if not isinstance(path_dir,ROOT.TDirectoryFile):
0174         continue
0175 
0176     if len(options.selected_paths) != 0 and not pathName in options.selected_paths:
0177         continue
0178 
0179     # further checks which are done in the next
0180     # loop are not repeated here.
0181     # so we might get a maximum number of characters
0182     # which is slightly too high (but the code here
0183     # is more readable)
0184 
0185     allPathNames.append(pathName)
0186 
0187     maxPathNameLen = max(maxPathNameLen, len(pathName))
0188 
0189 #--------------------
0190 
0191 for path_name in allPathNames:
0192 
0193     path_dir = top_dir.Get(path_name)
0194 
0195     # just select directories (there are also other
0196     # objects in the top directory)
0197     if not isinstance(path_dir,ROOT.TDirectoryFile):
0198         continue
0199 
0200     # find modules in order from total_eff_MC_matched histogram
0201     total_eff_histo = path_dir.Get("total_eff_MC_matched")
0202 
0203     if total_eff_histo == None:
0204         # try with data:
0205         total_eff_histo = path_dir.Get("total_eff_RECO_matched")
0206 
0207     # subtract 2 for 'Total' and 'Gen' bins
0208     num_modules = total_eff_histo.GetNbinsX() - 2
0209 
0210     total = total_eff_histo.GetBinContent(num_modules)
0211     num_gen_events = total_eff_histo.GetBinContent(num_modules + 2)
0212 
0213     if num_gen_events == 0 and options.ignore_empty_paths:
0214         continue
0215 
0216     # check whether at least one event passed all modules
0217     if options.ignore_zero_efficiency:
0218         # get number of entries in last module
0219 
0220         last_module_index = num_modules - 1
0221         
0222         last_module_accepted_events = total_eff_histo.GetBinContent(last_module_index+1)
0223 
0224         if last_module_accepted_events < 1:
0225             continue
0226 
0227     
0228     #--------------------
0229 
0230     if not options.summary_mode:
0231         print("----------------------------------------")
0232 
0233     print(("PATH: %-" + str(maxPathNameLen) + "s") % path_name, end=' ')
0234 
0235     if num_gen_events > 0:
0236         print("(%5.1f%% eff.)" % (100 * total / float(num_gen_events)), end=' ')
0237 
0238     elif options.summary_mode:
0239         print("(no entries)", end=' ')
0240 
0241     print()
0242 
0243     if not options.summary_mode:
0244         print("----------------------------------------")
0245 
0246         print("  %-80s: %5d events" % ('generated', num_gen_events))
0247 
0248     if options.summary_mode:
0249         continue
0250 
0251     previous_module_output = num_gen_events
0252 
0253     print()
0254 
0255     for i in range(num_modules):
0256 
0257         module_name = total_eff_histo.GetXaxis().GetBinLabel(i+1)
0258 
0259         if options.split_names:
0260             module_name = splitAtCapitalization(module_name)
0261 
0262         events = total_eff_histo.GetBinContent(i+1)
0263 
0264         
0265 
0266 
0267         print("  %-90s: %5d events" % (module_name, events), end=' ')
0268 
0269         if previous_module_output > 0:
0270             eff = 100 * events / float(previous_module_output)
0271             print("(%5.1f%% eff.)" % (eff), end=' ')
0272             if eff > 100.:
0273                 if module_name.find("Unseeded") >= 0:
0274                     print(">100% Unseeded Filter", end=' ')
0275                 else:
0276                     print("ERROR", end=' ')
0277 
0278         print()
0279                                      
0280 
0281         previous_module_output = events
0282 
0283 
0284     print() 
0285     
0286 
0287     
0288 
0289