Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /HLTrigger/Configuration/scripts/hltFindDuplicates is written in an unsupported language. File is not indexed.

0001 #!/usr/bin/env python3
0002 """hltFindDuplicates: script to find duplicate modules of an HLT configuration.
0003 
0004 Input.
0005  Path to a local cmsRun configuration file, or stdin.
0006 
0007 Output.
0008  A directory containing
0009  (1) the input cmsRun configuration, and
0010  (2) text files listing the groups of duplicate modules.
0011 
0012 Examples.
0013 
0014  # input: local configuration file
0015  hltFindDuplicates tmp.py -o output_dir
0016 
0017  # input: stdin
0018  hltConfigFromDB --configName /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir
0019  hltGetConfiguration /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir -x realData=0 globalTag=@
0020 """
0021 import os
0022 import sys
0023 import argparse
0024 import re
0025 import itertools
0026 import shutil
0027 
0028 import FWCore.ParameterSet.Config as cms
0029 
0030 from HLTrigger.Configuration.Tools.frozendict import frozendict
0031 
0032 whitelist_types = [
0033   'HLTPrescaler',
0034   'HLTBool', 
0035 ]
0036 
0037 whitelist_labels = [
0038   'hltPreExpressSmart',
0039   'hltPreEventDisplaySmart',
0040   'hltPreHLTDQMOutputSmart',
0041   'hltPreHLTMONOutputSmart',
0042 ]
0043 
0044 def whitelist(module):
0045   return module.label in whitelist_labels or module.type in whitelist_types
0046 
0047 def iterate(arg):
0048   return (not isinstance(arg, str) and '__iter__' in dir(arg))
0049 
0050 def freeze(arg):
0051   if type(arg) == dict:
0052     return frozendict((k, freeze(v)) for (k, v) in iter(arg.items()))
0053   elif iterate(arg):
0054     return tuple( freeze(v) for v in arg )
0055   else:
0056     return arg
0057 
0058 def unfreeze(arg):
0059   if type(arg) == frozendict:
0060     return dict((k, unfreeze(v)) for (k, v) in iter(arg.items()))
0061   elif iterate(arg):
0062     return list( unfreeze(v) for v in arg )
0063   else:
0064     return arg
0065 
0066 def pythonize(arg):
0067   if 'parameters_' in dir(arg):
0068     arg = arg.parameters_()
0069 
0070   elif 'value' in dir(arg):
0071     arg = arg.value()
0072 
0073   if type(arg) == dict:
0074     return frozendict((k, pythonize(v)) for (k, v) in iter(arg.items()))
0075   elif iterate(arg):
0076     return tuple( pythonize(v) for v in arg )
0077   else:
0078     return arg
0079 
0080 def mkdirp(dirpath):
0081   try:
0082     os.makedirs(dirpath)
0083   except OSError:
0084     if not os.path.isdir(dirpath):
0085       raise
0086 
0087 class Module(object):
0088   type = ''
0089   label = ''
0090   params = frozendict()
0091   hash = 0
0092 
0093   def __init__(self, module):
0094     self.label = module.label_()
0095     self.type = module.type_()
0096     self.params = pythonize(module.parameters_())
0097     self.__rehash(self.params)
0098 
0099   def __str__(self):
0100     return f'{self.label} (type: {self.type}): {self.params}'
0101 
0102   def key(self):
0103     return self.hash
0104 
0105   def __rehash(self, params):
0106     self.hash = (hash(self.type) << 4) + hash(params)
0107 
0108   def __check(self, value, check):
0109     if isinstance(value, list):
0110       return any(self.__check(foo, check) for foo in value)
0111     elif isinstance(value, dict):
0112       return any(self.__check(value[foo], check) for foo in value)
0113     else:
0114       return isinstance(value, str) and bool(check.match(value))
0115 
0116   def __sub(self, value, group, label):
0117     if isinstance(value, list):
0118       return [self.__sub(foo, group, label) for foo in value]
0119     elif isinstance(value, dict):
0120       return {foo:self.__sub(value[foo], group, label) for foo in value}
0121     elif isinstance(value, str):
0122       return group.sub(r'%s\2' % label, value)
0123     else:
0124       return value
0125 
0126   def apply_rename(self, groups, verbosity_level):
0127     modified = False
0128     newparams = unfreeze(self.params)
0129 
0130     if verbosity_level > 2:
0131       print('')
0132       print(f'             {self.label} ({self.type})')
0133       print(f'             parameters before: {newparams}')
0134 
0135     for label, (group, check) in iter(groups.items()):
0136       for k, p in iter(newparams.items()):
0137         if self.__check(p, check):
0138           newparams[k] = self.__sub(p, check, label)
0139           modified = True
0140 
0141     if verbosity_level > 2:
0142       print(f'             parameters after:  {newparams}')
0143       print(f'             modified = {modified}')
0144 
0145     if modified:
0146       self.__rehash(frozendict(newparams))
0147 
0148 class ModuleList(object):
0149   modules = []
0150   hashToLabelDict = {}
0151 
0152   def append(self, module):
0153     m = Module(module)
0154     if not whitelist(m):
0155       self.modules.append(m)
0156 
0157   def extend(self, modules):
0158     for module in modules:
0159       self.append(module)
0160 
0161   def __init__(self, *args):
0162     for arg in args:
0163       if iterate(arg):
0164         self.extend(arg)
0165       else:
0166         self.append(arg)
0167 
0168   def hash_label(self, hash_value):
0169     return self.hashToLabelDict.get(hash_value, None)
0170 
0171   def sort(self):
0172     self.modules.sort(key = Module.key)
0173 
0174   def group(self):
0175     groups = dict()
0176     self.sort()
0177     for v, g in itertools.groupby(self.modules, Module.key):
0178       group = list(g)
0179       if len(group) > 1:
0180         g = [ m.label for m in group ]
0181         g.sort()
0182         # hash identifying the group (it is the same for every module in the group)
0183         g_key = group[0].key()
0184         if g_key not in self.hashToLabelDict:
0185           # label identifying this group of modules
0186           # (set only once so it cannot change from step to step)
0187           self.hashToLabelDict[g_key] = f'{group[0].type} ({g[0]})'
0188         r = re.compile(r'^(%s)($|:)' % r'|'.join(g))
0189         groups[g_key] = (g, r)
0190     return groups
0191 
0192   def apply_rename(self, groups, verbosity_level):
0193     for module in self.modules:
0194       module.apply_rename(groups, verbosity_level)
0195 
0196   def dump(self, indent=0):
0197     for m in self.modules:
0198       print(' '*indent + "%s = (%s) {" % (m.label, m.type))
0199       for k, v in iter(m.params.items()):
0200         print(' '*indent + "  %s = %s" % (k, v))
0201       print(' '*indent + '}\n')
0202 
0203 def findDuplicates(process, output_dir, verbosity_level):
0204   mkdirp(output_dir)
0205 
0206   modules = ModuleList( 
0207     iter(process.analyzers_().values()),
0208     iter(process.producers_().values()),
0209     iter(process.filters_().values())
0210   )
0211 
0212   oldups = 0
0213   groups = modules.group()
0214   dups = sum(len(g[0]) for g in groups.values()) - len(groups)
0215 
0216   index = 1
0217   while dups != oldups:
0218     groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
0219 
0220     dump = open(os.path.join(output_dir, f'step{index}.sed'), 'w')
0221     for group_label in sorted(groupLabelToHashDict.keys()):
0222       (group, regexp) = groups[groupLabelToHashDict[group_label]]
0223       dump.write('s#\\<\\(%s\\)\\>#%s#g\n' % ('\\|'.join(group), group_label))
0224     dump.close()
0225 
0226     dump = open(os.path.join(output_dir, f'step{index}.txt'), 'w')
0227     first_entry = True
0228     for group_label in sorted(groupLabelToHashDict.keys()):
0229       (group, regexp) = groups[groupLabelToHashDict[group_label]]
0230       dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
0231       first_entry = False
0232     dump.close()
0233 
0234     if verbosity_level > 0:
0235       print(f"[step {index:>2d}] found {dups:>3d} duplicates in {len(groups):>3d} groups")
0236 
0237     if verbosity_level > 2:
0238       print(f'[step {index:>2d}]   groups={groups}')
0239       print(f'[step {index:>2d}]   ---------------')
0240       print(f'[step {index:>2d}]   apply_rename ..')
0241 
0242     oldups = dups
0243     modules.apply_rename(groups, verbosity_level)
0244 
0245     if verbosity_level > 2:
0246       print()
0247       print(f'            ------------------------')
0248       print(f'            modules (after renaming)')
0249       print(f'            ------------------------')
0250       modules.dump(indent=14)
0251 
0252     groups = modules.group()
0253     dups = sum(len(g[0]) for g in groups.values()) - len(groups)
0254     index += 1
0255 
0256   groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
0257 
0258   dump = open(os.path.join(output_dir, 'groups.sed'), 'w')
0259   for group_label in sorted(groupLabelToHashDict.keys()):
0260     (group, regexp) = groups[groupLabelToHashDict[group_label]]
0261     dump.write('s#\\<\\(%s\\)\\>#%s#\n' % ('\\|'.join(group), group_label))
0262   dump.close()
0263 
0264   dump = open(os.path.join(output_dir, 'groups.txt'), 'w')
0265   first_entry = True
0266   for group_label in sorted(groupLabelToHashDict.keys()):
0267     (group, regexp) = groups[groupLabelToHashDict[group_label]]
0268     dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
0269     first_entry = False
0270   dump.close()
0271 
0272 ##
0273 ## main
0274 ##
0275 if __name__ == '__main__':
0276 
0277     ### args
0278     parser = argparse.ArgumentParser(
0279         prog = './'+os.path.basename(__file__),
0280         formatter_class = argparse.RawDescriptionHelpFormatter,
0281         description = __doc__,
0282         argument_default = argparse.SUPPRESS,
0283     )
0284 
0285     # menu: name of ConfDB config, or local cmsRun cfg file, or stdin
0286     parser.add_argument('menu',
0287                         nargs = '?',
0288                         metavar = 'MENU',
0289                         default = None,
0290                         help = 'Path to cmsRun configuration file (if not specified, stdin is used)')
0291 
0292     # output-dir: path to directory containing output files
0293     parser.add_argument('-o', '--output-dir',
0294                         metavar = 'OUTPUT_DIR',
0295                         default = 'hltFindDuplicates_output',
0296                         help = 'Path to directory containing output files')
0297 
0298     # menu arguments: list of arguments to be applied to the cmsRun configuration file
0299     # (via argparse, VarParsing, or similar)
0300     parser.add_argument('-x', '--menu-args',
0301                         nargs = '+',
0302                         metavar = 'MENU_ARGS',
0303                         default = [],
0304                         help = 'List of arguments (each without whitespaces) to be applied to the cmsRun configuration file')
0305 
0306     # verbosity level: level of verbosity of stdout/stderr printouts
0307     parser.add_argument('-v', '--verbosity-level',
0308                         metavar = 'VERBOSITY_LEVEL',
0309                         type = int,
0310                         default = 1,
0311                         help = 'Verbosity level')
0312 
0313     # parse command line arguments and options
0314     opts = parser.parse_args()
0315 
0316     print('-'*25)
0317     print('hltFindDuplicates')
0318     print('-'*25)
0319 
0320     # create new output directory
0321     if os.path.exists(opts.output_dir):
0322         log_msg = 'Failed to create output directory (a directory or file already exists under that path)'
0323         raise RuntimeError(f'{log_msg}: {opts.output_dir}')
0324 
0325     mkdirp(opts.output_dir)
0326     output_config_filepath = os.path.join(opts.output_dir, 'config.py')
0327 
0328     print(f'output directory: {opts.output_dir}')
0329     print('-'*25)
0330 
0331     # parse the HLT configuration from a local cfg file, or from standard input
0332     hlt = {'process': None, 'fragment': None}
0333 
0334     if opts.menu != None:
0335         if not os.path.isfile(opts.menu):
0336             raise RuntimeError(f'Invalid path to input file (file does not exist): {opts.menu}')
0337         shutil.copyfile(opts.menu, output_config_filepath)
0338     else:
0339         with open(output_config_filepath, 'w') as config_file:
0340             config_file.write(sys.stdin.read())
0341 
0342     sys.argv = [sys.argv[0], output_config_filepath] + opts.menu_args
0343     exec(open(output_config_filepath).read(), globals(), hlt)
0344 
0345     # find cms.Process object
0346     process = None
0347     if hlt['process'] != None:
0348         process = hlt['process']
0349     if hlt['fragment'] != None:
0350         process = hlt['fragment']
0351 
0352     if process == None or not isinstance(process, cms.Process):
0353         raise RuntimeError('Failed to find object of type cms.Process !')
0354 
0355     findDuplicates(process, output_dir=opts.output_dir, verbosity_level=opts.verbosity_level)