File indexing completed on 2023-03-17 10:59:04
0001
0002
0003 from __future__ import print_function
0004 import ROOT
0005 ROOT.PyConfig.IgnoreCommandLineOptions = True
0006 import os
0007 import sys
0008 import argparse
0009 import numpy as np
0010 from DQMServices.FileIO.blacklist import get_blacklist
0011 import multiprocessing
0012
0013 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path):
0014 base_file = ROOT.TFile(base_file_path, 'read')
0015 ROOT.gROOT.GetListOfFiles().Remove(base_file)
0016
0017 pr_file = ROOT.TFile(pr_file_path, 'read')
0018 ROOT.gROOT.GetListOfFiles().Remove(pr_file)
0019
0020 if base_file.IsOpen():
0021 print('Baseline file successfully opened', file=sys.stderr)
0022 else:
0023 print('Unable to open base file', file=sys.stderr)
0024 return
0025
0026 if pr_file.IsOpen():
0027 print('PR file successfully opened', file=sys.stderr)
0028 else:
0029 print('Unable to open PR file', file=sys.stderr)
0030 return
0031
0032 run_nr = get_run_nr(pr_file_path)
0033
0034
0035 base_flat_dict = flatten_file(base_file, run_nr)
0036 pr_flat_dict = flatten_file(pr_file, run_nr)
0037
0038
0039 shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
0040
0041
0042 only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
0043
0044
0045 only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
0046
0047
0048 paths_to_save_in_base = []
0049
0050
0051 paths_to_save_in_pr = []
0052
0053
0054 if num_processes > 1:
0055 print("starting comparison using %d process(es)" % num_processes)
0056 manager = multiprocessing.Manager()
0057 return_dict = manager.dict()
0058 proc = []
0059 iProc = 0
0060
0061 block = len(shared_paths)//num_processes
0062 for i in range(num_processes):
0063 p = multiprocessing.Process(target=compareMP, args=(shared_paths[i*block:(i+1)*block], pr_flat_dict, base_flat_dict, i, return_dict))
0064 proc.append(p)
0065 p.start()
0066 iProc += 1
0067 p = multiprocessing.Process(target=compareMP, args=(shared_paths[(i+1)*block:len(shared_paths)], pr_flat_dict, base_flat_dict, num_processes, return_dict))
0068 proc.append(p)
0069 p.start()
0070 iProc += 1
0071
0072 for i in range(iProc):
0073 proc[i].join()
0074 paths_to_save_in_pr.extend(return_dict[i]['pr'])
0075 paths_to_save_in_base.extend(return_dict[i]['base'])
0076
0077 paths_to_save_in_pr.sort()
0078 paths_to_save_in_base.sort()
0079 print("Done")
0080 else:
0081 compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
0082
0083
0084 for path in only_base_paths:
0085 item = base_flat_dict[path]
0086
0087 if item == None:
0088 continue
0089
0090 paths_to_save_in_base.append(path)
0091
0092
0093 for path in only_pr_paths:
0094 item = pr_flat_dict[path]
0095
0096 if item == None:
0097 continue
0098
0099 paths_to_save_in_pr.append(path)
0100
0101 base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
0102 pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
0103
0104
0105 save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
0106
0107
0108 save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
0109
0110 pr_file.Close()
0111 base_file.Close()
0112
0113
0114 nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
0115 nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
0116 nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
0117
0118 print('Base output file. PR output file. Changed elements, removed elements, added elements:')
0119 print(base_output_filename)
0120 print(pr_output_filename)
0121 print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
0122
0123 def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict):
0124
0125 comparisons = {'pr': [], 'base': []}
0126
0127
0128 for path in shared_paths:
0129 pr_item = pr_flat_dict[path]
0130 base_item = base_flat_dict[path]
0131
0132 if pr_item == None or base_item == None:
0133 continue
0134
0135 are_different=False
0136
0137 if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0138
0139 are_different = not compare_TProfile(pr_item, base_item)
0140
0141 elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0142
0143 are_different = not compare_TProfile(pr_item, base_item)
0144
0145 elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0146
0147 pr_array = np.array(pr_item)
0148 base_array = np.array(base_item)
0149
0150 if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0151 are_different = True
0152 else:
0153
0154 if pr_item != base_item:
0155 are_different = True
0156
0157 if are_different:
0158 comparisons['pr'].append(path)
0159 comparisons['base'].append(path)
0160 return_dict[iProc] = comparisons
0161
0162 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
0163
0164 for path in shared_paths:
0165 pr_item = pr_flat_dict[path]
0166 base_item = base_flat_dict[path]
0167
0168 if pr_item == None or base_item == None:
0169 continue
0170
0171 are_different=False
0172
0173 if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0174
0175 are_different = not compare_TProfile(pr_item, base_item)
0176
0177 elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0178
0179 are_different = not compare_TProfile(pr_item, base_item)
0180
0181 elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0182
0183 pr_array = np.array(pr_item)
0184 base_array = np.array(base_item)
0185
0186 if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0187 are_different = True
0188 else:
0189
0190 if pr_item != base_item:
0191 are_different = True
0192
0193 if are_different:
0194 paths_to_save_in_pr.append(path)
0195 paths_to_save_in_base.append(path)
0196
0197
0198 def compare_TProfile(pr_item, base_item):
0199 if pr_item.GetSize() != base_item.GetSize():
0200 return False
0201
0202 for i in range(pr_item.GetSize()):
0203 pr_bin_content = pr_item.GetBinContent(i)
0204 base_bin_content = base_item.GetBinContent(i)
0205
0206 pr_bin_entries = pr_item.GetBinEntries(i)
0207 base_bin_entries = base_item.GetBinEntries(i)
0208
0209 pr_bin_error = pr_item.GetBinError(i)
0210 base_bin_error = base_item.GetBinError(i)
0211
0212 if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
0213 return False
0214
0215 if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
0216 return False
0217
0218 if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
0219 return False
0220
0221 return True
0222
0223 def flatten_file(file, run_nr):
0224 result = {}
0225 for key in file.GetListOfKeys():
0226 try:
0227 traverse_till_end(key.ReadObj(), [], result, run_nr)
0228 except:
0229 pass
0230
0231 return result
0232
0233 def traverse_till_end(node, dirs_list, result, run_nr):
0234 new_dir_list = dirs_list + [get_node_name(node)]
0235 if hasattr(node, 'GetListOfKeys'):
0236 for key in node.GetListOfKeys():
0237 traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
0238 else:
0239 if not is_blacklisted(new_dir_list, run_nr):
0240 path = tuple(new_dir_list)
0241 result[path] = node
0242
0243 def get_node_name(node):
0244 if node.InheritsFrom('TObjString'):
0245
0246 name = node.GetName().split('>')[0][1:]
0247 return name + get_string_suffix()
0248 else:
0249 return node.GetName()
0250
0251 def get_string_suffix():
0252 return '_string_monitor_element'
0253
0254 def is_blacklisted(dirs_list, run_nr):
0255
0256 dirs_list = dirs_list[:]
0257
0258 if dirs_list[-1].endswith(get_string_suffix()):
0259 dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
0260
0261 return tuple(dirs_list) in get_blacklist(run_nr)
0262
0263 def save_paths(flat_dict, paths, result_file_path):
0264 if len(paths) == 0:
0265 print('No differences were observed - output will not be written', file=sys.stderr)
0266 return
0267
0268
0269 result_dir = os.path.dirname(result_file_path)
0270 if not os.path.exists(result_dir):
0271 os.makedirs(result_dir)
0272
0273 result_file = ROOT.TFile(result_file_path, 'recreate')
0274 ROOT.gROOT.GetListOfFiles().Remove(result_file)
0275
0276 if not result_file.IsOpen():
0277 print('Unable to open %s output file' % result_file_path, file=sys.stderr)
0278 return
0279
0280 for path in paths:
0281 save_to_file(flat_dict, path, result_file)
0282
0283 result_file.Close()
0284 print('Output written to %s file' % result_file_path, file=sys.stderr)
0285
0286
0287 def save_to_file(flat_dict, path, output_file):
0288 histogram = flat_dict[path]
0289
0290 current = output_file
0291
0292
0293 for directory in path[:-1]:
0294 current = create_dir(current, directory)
0295 current.cd()
0296
0297 histogram.Write()
0298
0299
0300 def create_dir(parent_dir, name):
0301 dir = parent_dir.Get(name)
0302 if not dir:
0303 dir = parent_dir.mkdir(name)
0304 return dir
0305
0306 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
0307
0308
0309
0310
0311
0312 input_file_name = os.path.basename(input_file_path)
0313
0314 run = input_file_name.split('_')[2]
0315 workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
0316 if not workflow:
0317 workflow = 'Unknown'
0318
0319 relval_prefix = ''
0320 if run == 'R000000001':
0321 relval_prefix = 'RelVal_'
0322
0323 baseOrPr = 'base'
0324 if isPr:
0325 baseOrPr = 'pr'
0326
0327 return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
0328
0329 def get_run_nr(file_path):
0330 return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
0331
0332 if __name__ == '__main__':
0333 parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
0334 "Comparison is done bin by bin and output is written to a root file containing only the changes.")
0335 parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
0336 parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
0337 parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
0338 parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
0339 parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
0340 parser.add_argument('-j', '--num-processes', help='Number of processes forked to parallel process the comparison', default=1, type=int)
0341 parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
0342 args = parser.parse_args()
0343
0344 cmssw_version = '_'.join(args.release_format.split('_')[:4])
0345
0346 create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.num_processes, args.output_dir)