Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:10:13

0001 #!/bin/env python3
0002 
0003 from __future__ import print_function
0004 import ROOT
0005 ROOT.PyConfig.IgnoreCommandLineOptions = True
0006 import os
0007 import sys
0008 import argparse
0009 import numpy as np
0010 from DQMServices.FileIO.blacklist import get_blacklist
0011 import multiprocessing
0012 
0013 def create_dif(base_file_path, pr_file_path, pr_number, test_number, cmssw_version, num_processes, output_dir_path):
0014    base_file = ROOT.TFile(base_file_path, 'read')
0015    ROOT.gROOT.GetListOfFiles().Remove(base_file)
0016 
0017    pr_file = ROOT.TFile(pr_file_path, 'read')
0018    ROOT.gROOT.GetListOfFiles().Remove(pr_file)
0019 
0020    if base_file.IsOpen():
0021       print('Baseline file successfully opened', file=sys.stderr)
0022    else:
0023       print('Unable to open base file', file=sys.stderr)
0024       return
0025 
0026    if pr_file.IsOpen():
0027       print('PR file successfully opened', file=sys.stderr)
0028    else:
0029       print('Unable to open PR file', file=sys.stderr)
0030       return
0031 
0032    run_nr = get_run_nr(pr_file_path)
0033 
0034    # Get list of paths (lists of directories)
0035    base_flat_dict = flatten_file(base_file, run_nr)
0036    pr_flat_dict = flatten_file(pr_file, run_nr)
0037 
0038    # Paths that appear in both baseline and PR data. (Intersection)
0039    shared_paths = list(set(pr_flat_dict).intersection(set(base_flat_dict)))
0040 
0041    # Paths that appear only in PR data. (Except)
0042    only_pr_paths = list(set(pr_flat_dict).difference(set(base_flat_dict)))
0043 
0044    # Paths that appear only in baseline data. (Except)
0045    only_base_paths = list(set(base_flat_dict).difference(set(pr_flat_dict)))
0046 
0047    # Histograms pointed to by these paths will be written to baseline output
0048    paths_to_save_in_base = []
0049 
0050    # Histograms pointed to by these paths will be written to pr output
0051    paths_to_save_in_pr = []
0052 
0053    # Make comparison
0054    if num_processes > 1:
0055        print("starting comparison using %d process(es)" % num_processes)
0056        manager = multiprocessing.Manager()
0057        return_dict = manager.dict()
0058        proc = []
0059        iProc = 0
0060 
0061        block = len(shared_paths)//num_processes
0062        for i in range(num_processes):
0063            p = multiprocessing.Process(target=compareMP, args=(shared_paths[i*block:(i+1)*block], pr_flat_dict, base_flat_dict, i, return_dict))
0064            proc.append(p)
0065            p.start()
0066            iProc += 1
0067        p = multiprocessing.Process(target=compareMP, args=(shared_paths[(i+1)*block:len(shared_paths)], pr_flat_dict, base_flat_dict, num_processes, return_dict))
0068        proc.append(p)
0069        p.start()
0070        iProc += 1
0071 
0072        for i in range(iProc):
0073            proc[i].join()
0074            paths_to_save_in_pr.extend(return_dict[i]['pr'])
0075            paths_to_save_in_base.extend(return_dict[i]['base'])
0076 
0077        paths_to_save_in_pr.sort()
0078        paths_to_save_in_base.sort()
0079        print("Done")
0080    else:
0081        compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base)
0082 
0083    # Collect paths that have to be written to baseline output file
0084    for path in only_base_paths:
0085       item = base_flat_dict[path]
0086 
0087       if item == None:
0088          continue
0089 
0090       paths_to_save_in_base.append(path)
0091 
0092    # Collect paths that have to be written to PR output file
0093    for path in only_pr_paths:
0094       item = pr_flat_dict[path]
0095 
0096       if item == None:
0097          continue
0098 
0099       paths_to_save_in_pr.append(path)
0100 
0101    base_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, False)
0102    pr_output_filename = get_output_filename(pr_file_path, pr_number, test_number, cmssw_version, True)
0103 
0104    # Write baseline output
0105    save_paths(base_flat_dict, paths_to_save_in_base, os.path.join(output_dir_path, 'base', base_output_filename))
0106 
0107    # Write PR output
0108    save_paths(pr_flat_dict, paths_to_save_in_pr, os.path.join(output_dir_path, 'pr', pr_output_filename))
0109 
0110    pr_file.Close()
0111    base_file.Close()
0112 
0113    # Info about changed, added and removed elements
0114    nr_of_changed_elements = len(set(paths_to_save_in_base).intersection(set(paths_to_save_in_pr)))
0115    nr_of_removed_elements = len(paths_to_save_in_base) - nr_of_changed_elements
0116    nr_of_added_elements = len(paths_to_save_in_pr) - nr_of_changed_elements
0117 
0118    print('Base output file. PR output file. Changed elements, removed elements, added elements:')
0119    print(base_output_filename)
0120    print(pr_output_filename)
0121    print('%s %s %s' % (nr_of_changed_elements, nr_of_removed_elements, nr_of_added_elements))
0122 
0123 def compareMP(shared_paths, pr_flat_dict, base_flat_dict, iProc, return_dict):
0124    # Prepare output dictionary
0125    comparisons = {'pr': [], 'base': []}
0126 
0127    # Collect paths that have to be written to both output files
0128    for path in shared_paths:
0129       pr_item = pr_flat_dict[path]
0130       base_item = base_flat_dict[path]
0131 
0132       if pr_item == None or base_item == None:
0133          continue
0134 
0135       are_different=False
0136 
0137       if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0138          # Compare TProfile (content, entries and errors)
0139          are_different = not compare_TProfile(pr_item, base_item)
0140 
0141       elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0142          # Compare TProfile (content, entries and errors)
0143          are_different = not compare_TProfile(pr_item, base_item)
0144 
0145       elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0146          # Compare bin by bin
0147          pr_array = np.array(pr_item)
0148          base_array = np.array(base_item)
0149 
0150          if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0151             are_different = True
0152       else:
0153          # Compare non histograms
0154          if pr_item != base_item:
0155             are_different = True
0156 
0157       if are_different:
0158          comparisons['pr'].append(path)
0159          comparisons['base'].append(path)
0160    return_dict[iProc] = comparisons
0161 
0162 def compare(shared_paths, pr_flat_dict, base_flat_dict, paths_to_save_in_pr, paths_to_save_in_base):
0163    # Collect paths that have to be written to both output files
0164    for path in shared_paths:
0165       pr_item = pr_flat_dict[path]
0166       base_item = base_flat_dict[path]
0167 
0168       if pr_item == None or base_item == None:
0169          continue
0170 
0171       are_different=False
0172 
0173       if pr_item.InheritsFrom('TProfile2D') and base_item.InheritsFrom('TProfile2D'):
0174          # Compare TProfile (content, entries and errors)
0175          are_different = not compare_TProfile(pr_item, base_item)
0176 
0177       elif pr_item.InheritsFrom('TProfile') and base_item.InheritsFrom('TProfile'):
0178          # Compare TProfile (content, entries and errors)
0179          are_different = not compare_TProfile(pr_item, base_item)
0180 
0181       elif pr_item.InheritsFrom('TH1') and base_item.InheritsFrom('TH1'):
0182          # Compare bin by bin
0183          pr_array = np.array(pr_item)
0184          base_array = np.array(base_item)
0185 
0186          if pr_array.shape != base_array.shape or not np.allclose(pr_array, base_array, equal_nan=True):
0187             are_different = True
0188       else:
0189          # Compare non histograms
0190          if pr_item != base_item:
0191             are_different = True
0192 
0193       if are_different:
0194          paths_to_save_in_pr.append(path)
0195          paths_to_save_in_base.append(path)
0196 
0197 # Returns False if different, True otherwise
0198 def compare_TProfile(pr_item, base_item):
0199    if pr_item.GetSize() != base_item.GetSize():
0200       return False
0201 
0202    for i in range(pr_item.GetSize()):
0203       pr_bin_content = pr_item.GetBinContent(i)
0204       base_bin_content = base_item.GetBinContent(i)
0205 
0206       pr_bin_entries = pr_item.GetBinEntries(i)
0207       base_bin_entries = base_item.GetBinEntries(i)
0208 
0209       pr_bin_error = pr_item.GetBinError(i)
0210       base_bin_error = base_item.GetBinError(i)
0211 
0212       if not np.isclose(pr_bin_content, base_bin_content, equal_nan=True):
0213          return False
0214 
0215       if not np.isclose(pr_bin_entries, base_bin_entries, equal_nan=True):
0216          return False
0217 
0218       if not np.isclose(pr_bin_error, base_bin_error, equal_nan=True):
0219          return False
0220 
0221    return True
0222 
0223 def flatten_file(file, run_nr):
0224    result = {}
0225    for key in file.GetListOfKeys():
0226       try:
0227          traverse_till_end(key.ReadObj(), [], result, run_nr)
0228       except:
0229          pass
0230 
0231    return result
0232 
0233 def traverse_till_end(node, dirs_list, result, run_nr):
0234    new_dir_list = dirs_list + [get_node_name(node)]
0235    if hasattr(node, 'GetListOfKeys'):
0236       for key in node.GetListOfKeys():
0237          traverse_till_end(key.ReadObj(), new_dir_list, result, run_nr)
0238    else:
0239       if not is_blacklisted(new_dir_list, run_nr):
0240          path = tuple(new_dir_list)
0241          result[path] = node
0242 
0243 def get_node_name(node):
0244    if node.InheritsFrom('TObjString'):
0245       # Strip out just the name from a tag (<name>value</name>)
0246       name = node.GetName().split('>')[0][1:]
0247       return name + get_string_suffix()
0248    else:
0249       return node.GetName()
0250 
0251 def get_string_suffix():
0252    return '_string_monitor_element'
0253 
0254 def is_blacklisted(dirs_list, run_nr):
0255    # Copy the list
0256    dirs_list = dirs_list[:]
0257    # Remove string suffix
0258    if dirs_list[-1].endswith(get_string_suffix()):
0259       dirs_list[-1] = dirs_list[-1].replace(get_string_suffix(), '')
0260 
0261    return tuple(dirs_list) in get_blacklist(run_nr)
0262 
0263 def save_paths(flat_dict, paths, result_file_path):
0264    if len(paths) == 0:
0265       print('No differences were observed - output will not be written', file=sys.stderr)
0266       return
0267 
0268    # Make sure output dir exists
0269    result_dir = os.path.dirname(result_file_path)
0270    if not os.path.exists(result_dir):
0271       os.makedirs(result_dir)
0272 
0273    result_file = ROOT.TFile(result_file_path, 'recreate')
0274    ROOT.gROOT.GetListOfFiles().Remove(result_file)
0275 
0276    if not result_file.IsOpen():
0277       print('Unable to open %s output file' % result_file_path, file=sys.stderr)
0278       return
0279 
0280    for path in paths:
0281       save_to_file(flat_dict, path, result_file)
0282 
0283    result_file.Close()
0284    print('Output written to %s file' % result_file_path, file=sys.stderr)
0285 
0286 # Saves file from flat_dict in the same dir of currently open file for writing
0287 def save_to_file(flat_dict, path, output_file):
0288    histogram = flat_dict[path]
0289 
0290    current = output_file
0291 
0292    # Last item is filename. No need to create dir for it
0293    for directory in path[:-1]:
0294       current = create_dir(current, directory)
0295       current.cd()
0296 
0297    histogram.Write()
0298 
0299 # Create dir in root file if it doesn't exist
0300 def create_dir(parent_dir, name):
0301    dir = parent_dir.Get(name)
0302    if not dir:
0303       dir = parent_dir.mkdir(name)
0304    return dir
0305 
0306 def get_output_filename(input_file_path, pr_number, test_number, cmssw_version, isPr):
0307    # Samples of correct output file format:
0308    # DQM_V0001_R000320822__wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
0309    # When run number is 1 we have to use RelVal naming pattern:
0310    # DQM_V0002_R000000001__RelVal_wf136_892_pr__CMSSW_10_4_0_pre3-PR25518-1234__DQMIO.root
0311 
0312    input_file_name = os.path.basename(input_file_path)
0313 
0314    run = input_file_name.split('_')[2]
0315    workflow = os.path.basename(os.path.dirname(input_file_path)).split('_')[0].replace('.', '_')
0316    if not workflow:
0317       workflow = 'Unknown'
0318 
0319    relval_prefix = ''
0320    if run == 'R000000001':
0321       relval_prefix = 'RelVal_'
0322 
0323    baseOrPr = 'base'
0324    if isPr:
0325       baseOrPr = 'pr'
0326 
0327    return 'DQM_V0001_%s__%swf%s_%s__%s-PR%s-%s__DQMIO.root' % (run, relval_prefix, workflow, baseOrPr, cmssw_version, pr_number, test_number)
0328 
0329 def get_run_nr(file_path):
0330    return os.path.basename(file_path).split('_')[2].lstrip('R').lstrip('0')
0331 
0332 if __name__ == '__main__':
0333    parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements found in base-file with the ones found in pr-file."
0334       "Comparison is done bin by bin and output is written to a root file containing only the changes.")
0335    parser.add_argument('-b', '--base-file', help='Baseline IB DQM root file', required=True)
0336    parser.add_argument('-p', '--pr-file', help='PR DQM root file', required=True)
0337    parser.add_argument('-n', '--pr-number', help='PR number under test', default='00001')
0338    parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
0339    parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', default=os.environ['CMSSW_VERSION'])
0340    parser.add_argument('-j', '--num-processes', help='Number of processes forked to parallel process the comparison', default=1, type=int)
0341    parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
0342    args = parser.parse_args()
0343 
0344    cmssw_version = '_'.join(args.release_format.split('_')[:4])
0345 
0346    create_dif(args.base_file, args.pr_file, args.pr_number, args.test_number, cmssw_version, args.num_processes, args.output_dir)