Warning, /HLTrigger/Configuration/scripts/hltFindDuplicates is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002 """hltFindDuplicates: script to find duplicate modules of an HLT configuration.
0003
0004 Input.
0005 Path to a local cmsRun configuration file, or stdin.
0006
0007 Output.
0008 A directory containing
0009 (1) the input cmsRun configuration, and
0010 (2) text files listing the groups of duplicate modules.
0011
0012 Examples.
0013
0014 # input: local configuration file
0015 hltFindDuplicates tmp.py -o output_dir
0016
0017 # input: stdin
0018 hltConfigFromDB --configName /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir
0019 hltGetConfiguration /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir -x realData=0 globalTag=@
0020 """
0021 import os
0022 import sys
0023 import argparse
0024 import re
0025 import itertools
0026 import shutil
0027
0028 import FWCore.ParameterSet.Config as cms
0029
0030 from HLTrigger.Configuration.Tools.frozendict import frozendict
0031
0032 whitelist_types = [
0033 'HLTPrescaler',
0034 'HLTBool',
0035 ]
0036
0037 whitelist_labels = [
0038 'hltPreExpressSmart',
0039 'hltPreEventDisplaySmart',
0040 'hltPreHLTDQMOutputSmart',
0041 'hltPreHLTMONOutputSmart',
0042 ]
0043
0044 def whitelist(module):
0045 return module.label in whitelist_labels or module.type in whitelist_types
0046
0047 def iterate(arg):
0048 return (not isinstance(arg, str) and '__iter__' in dir(arg))
0049
0050 def freeze(arg):
0051 if type(arg) == dict:
0052 return frozendict((k, freeze(v)) for (k, v) in iter(arg.items()))
0053 elif iterate(arg):
0054 return tuple( freeze(v) for v in arg )
0055 else:
0056 return arg
0057
0058 def unfreeze(arg):
0059 if type(arg) == frozendict:
0060 return dict((k, unfreeze(v)) for (k, v) in iter(arg.items()))
0061 elif iterate(arg):
0062 return list( unfreeze(v) for v in arg )
0063 else:
0064 return arg
0065
0066 def pythonize(arg):
0067 if 'parameters_' in dir(arg):
0068 arg = arg.parameters_()
0069
0070 elif 'value' in dir(arg):
0071 arg = arg.value()
0072
0073 if type(arg) == dict:
0074 return frozendict((k, pythonize(v)) for (k, v) in iter(arg.items()))
0075 elif iterate(arg):
0076 return tuple( pythonize(v) for v in arg )
0077 else:
0078 return arg
0079
0080 def mkdirp(dirpath):
0081 try:
0082 os.makedirs(dirpath)
0083 except OSError:
0084 if not os.path.isdir(dirpath):
0085 raise
0086
0087 class Module(object):
0088 type = ''
0089 label = ''
0090 params = frozendict()
0091 hash = 0
0092
0093 def __init__(self, module):
0094 self.label = module.label_()
0095 self.type = module.type_()
0096 self.params = pythonize(module.parameters_())
0097 self.__rehash(self.params)
0098
0099 def __str__(self):
0100 return f'{self.label} (type: {self.type}): {self.params}'
0101
0102 def key(self):
0103 return self.hash
0104
0105 def __rehash(self, params):
0106 self.hash = (hash(self.type) << 4) + hash(params)
0107
0108 def __check(self, value, check):
0109 if isinstance(value, list):
0110 return any(self.__check(foo, check) for foo in value)
0111 elif isinstance(value, dict):
0112 return any(self.__check(value[foo], check) for foo in value)
0113 else:
0114 return isinstance(value, str) and bool(check.match(value))
0115
0116 def __sub(self, value, group, label):
0117 if isinstance(value, list):
0118 return [self.__sub(foo, group, label) for foo in value]
0119 elif isinstance(value, dict):
0120 return {foo:self.__sub(value[foo], group, label) for foo in value}
0121 elif isinstance(value, str):
0122 return group.sub(r'%s\2' % label, value)
0123 else:
0124 return value
0125
0126 def apply_rename(self, groups, verbosity_level):
0127 modified = False
0128 newparams = unfreeze(self.params)
0129
0130 if verbosity_level > 2:
0131 print('')
0132 print(f' {self.label} ({self.type})')
0133 print(f' parameters before: {newparams}')
0134
0135 for label, (group, check) in iter(groups.items()):
0136 for k, p in iter(newparams.items()):
0137 if self.__check(p, check):
0138 newparams[k] = self.__sub(p, check, label)
0139 modified = True
0140
0141 if verbosity_level > 2:
0142 print(f' parameters after: {newparams}')
0143 print(f' modified = {modified}')
0144
0145 if modified:
0146 self.__rehash(frozendict(newparams))
0147
0148 class ModuleList(object):
0149 modules = []
0150 hashToLabelDict = {}
0151
0152 def append(self, module):
0153 m = Module(module)
0154 if not whitelist(m):
0155 self.modules.append(m)
0156
0157 def extend(self, modules):
0158 for module in modules:
0159 self.append(module)
0160
0161 def __init__(self, *args):
0162 for arg in args:
0163 if iterate(arg):
0164 self.extend(arg)
0165 else:
0166 self.append(arg)
0167
0168 def hash_label(self, hash_value):
0169 return self.hashToLabelDict.get(hash_value, None)
0170
0171 def sort(self):
0172 self.modules.sort(key = Module.key)
0173
0174 def group(self):
0175 groups = dict()
0176 self.sort()
0177 for v, g in itertools.groupby(self.modules, Module.key):
0178 group = list(g)
0179 if len(group) > 1:
0180 g = [ m.label for m in group ]
0181 g.sort()
0182 # hash identifying the group (it is the same for every module in the group)
0183 g_key = group[0].key()
0184 if g_key not in self.hashToLabelDict:
0185 # label identifying this group of modules
0186 # (set only once so it cannot change from step to step)
0187 self.hashToLabelDict[g_key] = f'{group[0].type} ({g[0]})'
0188 r = re.compile(r'^(%s)($|:)' % r'|'.join(g))
0189 groups[g_key] = (g, r)
0190 return groups
0191
0192 def apply_rename(self, groups, verbosity_level):
0193 for module in self.modules:
0194 module.apply_rename(groups, verbosity_level)
0195
0196 def dump(self, indent=0):
0197 for m in self.modules:
0198 print(' '*indent + "%s = (%s) {" % (m.label, m.type))
0199 for k, v in iter(m.params.items()):
0200 print(' '*indent + " %s = %s" % (k, v))
0201 print(' '*indent + '}\n')
0202
0203 def findDuplicates(process, output_dir, verbosity_level):
0204 mkdirp(output_dir)
0205
0206 modules = ModuleList(
0207 iter(process.analyzers_().values()),
0208 iter(process.producers_().values()),
0209 iter(process.filters_().values())
0210 )
0211
0212 oldups = 0
0213 groups = modules.group()
0214 dups = sum(len(g[0]) for g in groups.values()) - len(groups)
0215
0216 index = 1
0217 while dups != oldups:
0218 groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
0219
0220 dump = open(os.path.join(output_dir, f'step{index}.sed'), 'w')
0221 for group_label in sorted(groupLabelToHashDict.keys()):
0222 (group, regexp) = groups[groupLabelToHashDict[group_label]]
0223 dump.write('s#\\<\\(%s\\)\\>#%s#g\n' % ('\\|'.join(group), group_label))
0224 dump.close()
0225
0226 dump = open(os.path.join(output_dir, f'step{index}.txt'), 'w')
0227 first_entry = True
0228 for group_label in sorted(groupLabelToHashDict.keys()):
0229 (group, regexp) = groups[groupLabelToHashDict[group_label]]
0230 dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
0231 first_entry = False
0232 dump.close()
0233
0234 if verbosity_level > 0:
0235 print(f"[step {index:>2d}] found {dups:>3d} duplicates in {len(groups):>3d} groups")
0236
0237 if verbosity_level > 2:
0238 print(f'[step {index:>2d}] groups={groups}')
0239 print(f'[step {index:>2d}] ---------------')
0240 print(f'[step {index:>2d}] apply_rename ..')
0241
0242 oldups = dups
0243 modules.apply_rename(groups, verbosity_level)
0244
0245 if verbosity_level > 2:
0246 print()
0247 print(f' ------------------------')
0248 print(f' modules (after renaming)')
0249 print(f' ------------------------')
0250 modules.dump(indent=14)
0251
0252 groups = modules.group()
0253 dups = sum(len(g[0]) for g in groups.values()) - len(groups)
0254 index += 1
0255
0256 groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
0257
0258 dump = open(os.path.join(output_dir, 'groups.sed'), 'w')
0259 for group_label in sorted(groupLabelToHashDict.keys()):
0260 (group, regexp) = groups[groupLabelToHashDict[group_label]]
0261 dump.write('s#\\<\\(%s\\)\\>#%s#\n' % ('\\|'.join(group), group_label))
0262 dump.close()
0263
0264 dump = open(os.path.join(output_dir, 'groups.txt'), 'w')
0265 first_entry = True
0266 for group_label in sorted(groupLabelToHashDict.keys()):
0267 (group, regexp) = groups[groupLabelToHashDict[group_label]]
0268 dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
0269 first_entry = False
0270 dump.close()
0271
0272 ##
0273 ## main
0274 ##
0275 if __name__ == '__main__':
0276
0277 ### args
0278 parser = argparse.ArgumentParser(
0279 prog = './'+os.path.basename(__file__),
0280 formatter_class = argparse.RawDescriptionHelpFormatter,
0281 description = __doc__,
0282 argument_default = argparse.SUPPRESS,
0283 )
0284
0285 # menu: name of ConfDB config, or local cmsRun cfg file, or stdin
0286 parser.add_argument('menu',
0287 nargs = '?',
0288 metavar = 'MENU',
0289 default = None,
0290 help = 'Path to cmsRun configuration file (if not specified, stdin is used)')
0291
0292 # output-dir: path to directory containing output files
0293 parser.add_argument('-o', '--output-dir',
0294 metavar = 'OUTPUT_DIR',
0295 default = 'hltFindDuplicates_output',
0296 help = 'Path to directory containing output files')
0297
0298 # menu arguments: list of arguments to be applied to the cmsRun configuration file
0299 # (via argparse, VarParsing, or similar)
0300 parser.add_argument('-x', '--menu-args',
0301 nargs = '+',
0302 metavar = 'MENU_ARGS',
0303 default = [],
0304 help = 'List of arguments (each without whitespaces) to be applied to the cmsRun configuration file')
0305
0306 # verbosity level: level of verbosity of stdout/stderr printouts
0307 parser.add_argument('-v', '--verbosity-level',
0308 metavar = 'VERBOSITY_LEVEL',
0309 type = int,
0310 default = 1,
0311 help = 'Verbosity level')
0312
0313 # parse command line arguments and options
0314 opts = parser.parse_args()
0315
0316 print('-'*25)
0317 print('hltFindDuplicates')
0318 print('-'*25)
0319
0320 # create new output directory
0321 if os.path.exists(opts.output_dir):
0322 log_msg = 'Failed to create output directory (a directory or file already exists under that path)'
0323 raise RuntimeError(f'{log_msg}: {opts.output_dir}')
0324
0325 mkdirp(opts.output_dir)
0326 output_config_filepath = os.path.join(opts.output_dir, 'config.py')
0327
0328 print(f'output directory: {opts.output_dir}')
0329 print('-'*25)
0330
0331 # parse the HLT configuration from a local cfg file, or from standard input
0332 hlt = {'process': None, 'fragment': None}
0333
0334 if opts.menu != None:
0335 if not os.path.isfile(opts.menu):
0336 raise RuntimeError(f'Invalid path to input file (file does not exist): {opts.menu}')
0337 shutil.copyfile(opts.menu, output_config_filepath)
0338 else:
0339 with open(output_config_filepath, 'w') as config_file:
0340 config_file.write(sys.stdin.read())
0341
0342 sys.argv = [sys.argv[0], output_config_filepath] + opts.menu_args
0343 exec(open(output_config_filepath).read(), globals(), hlt)
0344
0345 # find cms.Process object
0346 process = None
0347 if hlt['process'] != None:
0348 process = hlt['process']
0349 if hlt['fragment'] != None:
0350 process = hlt['fragment']
0351
0352 if process == None or not isinstance(process, cms.Process):
0353 raise RuntimeError('Failed to find object of type cms.Process !')
0354
0355 findDuplicates(process, output_dir=opts.output_dir, verbosity_level=opts.verbosity_level)