File indexing completed on 2024-11-26 02:34:12
0001
0002
0003 import ROOT
0004 ROOT.PyConfig.IgnoreCommandLineOptions = True
0005 import os
0006 import sys
0007 import argparse
0008 import numpy as np
0009 from DQMServices.FileIO.blacklist import get_blacklist
0010 import multiprocessing
0011
0012 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path):
0013 base_file = ROOT.TFile(base_file_path, 'read')
0014 ROOT.gROOT.GetListOfFiles().Remove(base_file)
0015
0016 pr_file = ROOT.TFile(pr_file_path, 'read')
0017 ROOT.gROOT.GetListOfFiles().Remove(pr_file)
0018
0019 if base_file.IsOpen():
0020 print('Baseline file successfully opened', file=sys.stderr)
0021 else:
0022 print('Unable to open base file', file=sys.stderr)
0023 return
0024
0025 if pr_file.IsOpen():
0026 print('PR file successfully opened', file=sys.stderr)
0027 else:
0028 print('Unable to open PR file', file=sys.stderr)
0029 return
0030
0031 run_nr = get_run_nr(pr_file_path)
0032
0033
0034 base_flat_dict = flatten_file(base_file, run_nr)
0035 pr_flat_dict = flatten_file(pr_file, run_nr)
0036
0037
0038 shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
0039
0040
0041 only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
0042
0043
0044 only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
0045
0046
0047 paths_to_save_in_base = []
0048
0049
0050 paths_to_save_in_pr = []
0051
0052
0053 if num_processes > 1:
0054 print("starting comparison using %d process(es)" % num_processes)
0055 manager = multiprocessing.Manager()
0056 return_dict = manager.dict()
0057 proc = []
0058 iProc = 0
0059
0060 block = len(shared_paths)//num_processes
0061 for i in range(num_processes):
0062 p = multiprocessing.Process(target=compareMP, args=(shared_paths[i*block:(i+1)*block], pr_flat_dict, base_flat_dict, i, return_dict))
0063 proc.append(p)
0064 p.start()
0065 iProc += 1
0066 p = multiprocessing.Process(target=compareMP, args=(shared_paths[(i+1)*block:len(shared_paths)], pr_flat_dict, base_flat_dict, num_processes, return_dict))
0067 proc.append(p)
0068 p.start()
0069 iProc += 1
0070
0071 for i in range(iProc):
0072 proc[i].join()
0073 paths_to_save_in_pr.extend(return_dict[i]['pr'])
0074 paths_to_save_in_base.extend(return_dict[i]['base'])
0075
0076 paths_to_save_in_pr.sort()
0077 paths_to_save_in_base.sort()
0078 print("Done")
0079 else:
0080 compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
0081
0082
0083 for path in only_base_paths:
0084 item = base_flat_dict[path]
0085
0086 if item == None:
0087 continue
0088
0089 paths_to_save_in_base.append(path)
0090
0091
0092 for path in only_pr_paths:
0093 item = pr_flat_dict[path]
0094
0095 if item == None:
0096 continue
0097
0098 paths_to_save_in_pr.append(path)
0099
0100 base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
0101 pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
0102
0103
0104 save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
0105
0106
0107 save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
0108
0109 pr_file.Close()
0110 base_file.Close()
0111
0112
0113 nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
0114 nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
0115 nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
0116
0117 print('Base output file. PR output file. Changed elements, removed elements, added elements:')
0118 print(base_output_filename)
0119 print(pr_output_filename)
0120 print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
0121
0122 def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict):
0123
0124 comparisons = {'pr': [], 'base': []}
0125
0126
0127 for path in shared_paths:
0128 pr_item = pr_flat_dict[path]
0129 base_item = base_flat_dict[path]
0130
0131 if pr_item == None or base_item == None:
0132 continue
0133
0134 are_different=False
0135
0136 if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0137
0138 are_different = not compare_TProfile(pr_item, base_item)
0139
0140 elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0141
0142 are_different = not compare_TProfile(pr_item, base_item)
0143
0144 elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0145
0146 pr_array = np.array(pr_item)
0147 base_array = np.array(base_item)
0148
0149 if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0150 are_different = True
0151 else:
0152
0153 if pr_item != base_item:
0154 are_different = True
0155
0156 if are_different:
0157 comparisons['pr'].append(path)
0158 comparisons['base'].append(path)
0159 return_dict[iProc] = comparisons
0160
0161 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
0162
0163 for path in shared_paths:
0164 pr_item = pr_flat_dict[path]
0165 base_item = base_flat_dict[path]
0166
0167 if pr_item == None or base_item == None:
0168 continue
0169
0170 are_different=False
0171
0172 if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0173
0174 are_different = not compare_TProfile(pr_item, base_item)
0175
0176 elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0177
0178 are_different = not compare_TProfile(pr_item, base_item)
0179
0180 elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0181
0182 pr_array = np.array(pr_item)
0183 base_array = np.array(base_item)
0184
0185 if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0186 are_different = True
0187 else:
0188
0189 if pr_item != base_item:
0190 are_different = True
0191
0192 if are_different:
0193 paths_to_save_in_pr.append(path)
0194 paths_to_save_in_base.append(path)
0195
0196
0197 def compare_TProfile(pr_item, base_item):
0198 if pr_item.GetSize() != base_item.GetSize():
0199 return False
0200
0201 for i in range(pr_item.GetSize()):
0202 pr_bin_content = pr_item.GetBinContent(i)
0203 base_bin_content = base_item.GetBinContent(i)
0204
0205 pr_bin_entries = pr_item.GetBinEntries(i)
0206 base_bin_entries = base_item.GetBinEntries(i)
0207
0208 pr_bin_error = pr_item.GetBinError(i)
0209 base_bin_error = base_item.GetBinError(i)
0210
0211 if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
0212 return False
0213
0214 if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
0215 return False
0216
0217 if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
0218 return False
0219
0220 return True
0221
0222 def flatten_file(file, run_nr):
0223 result = {}
0224 for key in file.GetListOfKeys():
0225 try:
0226 traverse_till_end(key.ReadObj(), [], result, run_nr)
0227 except:
0228 pass
0229
0230 return result
0231
0232 def traverse_till_end(node, dirs_list, result, run_nr):
0233 new_dir_list = dirs_list + [get_node_name(node)]
0234 if hasattr(node, 'GetListOfKeys'):
0235 for key in node.GetListOfKeys():
0236 traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
0237 else:
0238 if not is_blacklisted(new_dir_list, run_nr):
0239 path = tuple(new_dir_list)
0240 result[path] = node
0241
0242 def get_node_name(node):
0243 if node.InheritsFrom('TObjString'):
0244
0245 name = node.GetName().split('>')[0][1:]
0246 return name + get_string_suffix()
0247 else:
0248 return node.GetName()
0249
0250 def get_string_suffix():
0251 return '_string_monitor_element'
0252
0253 def is_blacklisted(dirs_list, run_nr):
0254
0255 dirs_list = dirs_list[:]
0256
0257 if dirs_list[-1].endswith(get_string_suffix()):
0258 dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
0259
0260 return tuple(dirs_list) in get_blacklist(run_nr)
0261
0262 def save_paths(flat_dict, paths, result_file_path):
0263 if len(paths) == 0:
0264 print('No differences were observed - output will not be written', file=sys.stderr)
0265 return
0266
0267
0268 result_dir = os.path.dirname(result_file_path)
0269 if not os.path.exists(result_dir):
0270 os.makedirs(result_dir)
0271
0272 result_file = ROOT.TFile(result_file_path, 'recreate')
0273 ROOT.gROOT.GetListOfFiles().Remove(result_file)
0274
0275 if not result_file.IsOpen():
0276 print('Unable to open %s output file' % result_file_path, file=sys.stderr)
0277 return
0278
0279 for path in paths:
0280 save_to_file(flat_dict, path, result_file)
0281
0282 result_file.Close()
0283 print('Output written to %s file' % result_file_path, file=sys.stderr)
0284
0285
0286 def save_to_file(flat_dict, path, output_file):
0287 histogram = flat_dict[path]
0288
0289 current = output_file
0290
0291
0292 for directory in path[:-1]:
0293 current = create_dir(current, directory)
0294 current.cd()
0295
0296 histogram.Write()
0297
0298
0299 def create_dir(parent_dir, name):
0300 dir = parent_dir.Get(name)
0301 if not dir:
0302 dir = parent_dir.mkdir(name)
0303 return dir
0304
0305 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
0306
0307
0308
0309
0310
0311 input_file_name = os.path.basename(input_file_path)
0312
0313 run = input_file_name.split('_')[2]
0314 workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
0315 if not workflow:
0316 workflow = 'Unknown'
0317
0318 relval_prefix = ''
0319 if run == 'R000000001':
0320 relval_prefix = 'RelVal_'
0321
0322 baseOrPr = 'base'
0323 if isPr:
0324 baseOrPr = 'pr'
0325
0326 return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
0327
0328 def get_run_nr(file_path):
0329 return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
0330
0331 if __name__ == '__main__':
0332 parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
0333 "Comparison is done bin by bin and output is written to a root file containing only the changes.")
0334 parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
0335 parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
0336 parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
0337 parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
0338 parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
0339 parser.add_argument('-j', '--num-processes', help='Number of processes forked to parallel process the comparison', default=1, type=int)
0340 parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
0341 args = parser.parse_args()
0342
0343 cmssw_version = '_'.join(args.release_format.split('_')[:4])
0344
0345 create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.num_processes, args.output_dir)