FileIO/scripts/compareDQMOutput.py

0001 #!/bin/env python3
0002
0003 import os
0004 import sys
0005 import glob
0006 import argparse
0007 import subprocess
0008 from threading import Thread
0009
0010 COMPARISON_RESULTS = []
0011
0012 def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format):
0013     files = get_file_pairs(base_dir, pr_dir)
0014
0015     threads = []
0016     for _ in range(num_procs):
0017         thread = Thread(target=compare, args=(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format))
0018         thread.start()
0019         threads.append(thread)
0020
0021     [thread.join() for thread in threads]
0022
0023     COMPARISON_RESULTS.sort(key=lambda k: float(k['workflow']))
0024
0025 def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format):
0026     while files:
0027         try:
0028             file_name = files.pop()
0029             command = ['compareHistograms.py', '-b', os.path.join(base_dir, file_name), \
0030                 '-p', os.path.join(pr_dir, file_name), '-o', output_dir, '-n', pr_number, '-t', test_number, '-r', release_format]
0031             print('Running comparison:')
0032             print(' '.join(command))
0033
0034             output = subprocess.check_output(command).decode()
0035
0036             output_elements = output.split('\n')[1:]
0037             base_output_filename = output_elements[0]
0038             pr_output_filename = output_elements[1]
0039             run_nr = base_output_filename.split('_')[2].lstrip('R').lstrip('0')
0040             output_numbers = output_elements[2].split(' ')
0041
0042             workflow = os.path.basename(os.path.dirname(os.path.join(base_dir, file_name))).split('_')[0]
0043             base_dataset = '/' + '/'.join(base_output_filename.rstrip('.root').split('__')[1:])
0044             pr_dataset = '/' + '/'.join(pr_output_filename.rstrip('.root').split('__')[1:])
0045
0046             cmssw_version = '_'.join(release_format.split('_')[:4])
0047             cmssw_version = cmssw_version[:-1] + 'x'
0048             root_file_dir_in_gui = 'ROOT/RelValData/%s/' % cmssw_version
0049             if 'R000000001__RelVal' in base_output_filename:
0050                 root_file_dir_in_gui = 'ROOT/RelVal/%s/' % cmssw_version
0051
0052             base_file_path_in_gui = root_file_dir_in_gui + base_output_filename
0053             pr_file_path_in_gui = root_file_dir_in_gui + pr_output_filename
0054
0055             COMPARISON_RESULTS.append({'workflow': workflow, 'base_dataset': base_dataset, 'pr_dataset': pr_dataset, 'run_nr': run_nr,\
0056                 'changed_elements': int(output_numbers[0]), 'removed_elements': int(output_numbers[1]), 'added_elements': int(output_numbers[2]),
0057                 'base_file_path_in_gui': base_file_path_in_gui, 'pr_file_path_in_gui': pr_file_path_in_gui})
0058         except Exception as ex:
0059             print('Exception comparing two root files: %s' % ex)
0060
0061 def get_file_pairs(base_dir, pr_dir):
0062     base_files = glob.glob(os.path.join(base_dir, '*.*_*/DQM_*.root'))
0063     pr_files = glob.glob(os.path.join(pr_dir, '*.*_*/DQM_*.root'))
0064
0065     # Remove base directories and leave
0066     # only parts of paths that are same
0067     base_files = [ os.path.relpath(x, base_dir) for x in base_files ]
0068     pr_files =   [ os.path.relpath(x, pr_dir) for x in pr_files ]
0069
0070     # Find intersection
0071     return [value for value in base_files if value in pr_files]
0072
0073 def upload_to_gui(output_dir, num_procs, dqmgui_url):
0074     base_files = glob.glob(os.path.join(output_dir, 'base/*.root'))
0075     pr_files = glob.glob(os.path.join(output_dir, 'pr/*.root'))
0076
0077     files = base_files + pr_files
0078
0079     print('Files to be uploaded:')
0080     print(files)
0081
0082     for _ in range(min(num_procs, len(files))):
0083         thread = Thread(target=upload, args=(files, dqmgui_url))
0084         thread.start()
0085
0086 def upload(files, dqmgui_url):
0087     while files:
0088         try:
0089             file = files.pop()
0090             command = ['visDQMUpload.py', dqmgui_url, file]
0091             print('Uploading output:')
0092             print(' '.join(command))
0093
0094             subprocess.call(command)
0095             print('')
0096         except Exception as ex:
0097             # This might throw when another thread pops the last filename immediately after this one
0098             # started the loop. In this case this exception can be safely ignored.
0099             print('Exception uploading a file: %s' % ex)
0100
0101 def generate_summary_html(output_dir, pr_list, summary_dir, dqmgui_url):
0102     template_file_path = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
0103     if not os.path.isfile(template_file_path):
0104         template_file_path = os.path.join(os.getenv('CMSSW_RELEASE_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
0105     template_file = open(template_file_path, 'r')
0106     result = template_file.read()
0107
0108     result = result.replace('$PR_LIST$', pr_list)
0109
0110     table_items = ''
0111     total_changes = 0
0112
0113     for comp in COMPARISON_RESULTS:
0114         total_changes += comp['removed_elements'] + comp['added_elements'] + comp['changed_elements']
0115         baseline_count = comp['changed_elements'] + comp['removed_elements']
0116         pr_count = comp['changed_elements'] + comp['added_elements']
0117         overlay_count = baseline_count
0118
0119         # Make urls
0120         base_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['base_dataset'])
0121         pr_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['pr_dataset'])
0122         overlay_url = '%s/start?runnr=%s;dataset%%3D%s;referenceshow%%3Dall;referencenorm=False;referenceobj1%%3Dother::%s::;sampletype%%3Doffline_relval;workspace%%3DEverything;' \
0123             % (dqmgui_url, comp['run_nr'], comp['pr_dataset'], comp['base_dataset'])
0124         base_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['base_file_path_in_gui'])
0125         pr_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['pr_file_path_in_gui'])
0126
0127         table_items += '        <tr>\n'
0128         table_items += '            <td><a href="%s" target="_blank">%s baseline GUI</a><span> (%s)</span></td>\n' % (base_url, comp['workflow'], baseline_count)
0129         table_items += '            <td><a href="%s" target="_blank">%s pr GUI</a><span> (%s)</span></td>\n' % (pr_url, comp['workflow'], pr_count)
0130         table_items += '            <td><a href="%s" target="_blank">%s overlay GUI</a><span> (%s)</span></td>\n' % (overlay_url, comp['workflow'], overlay_count)
0131         table_items += '            <td><a href="%s" target="_blank">%s baseline rootjs</a><span> (%s)</span></td>\n' % (base_raw_url, comp['workflow'], baseline_count)
0132         table_items += '            <td><a href="%s" target="_blank">%s pr rootjs</a><span> (%s)</span></td>\n' % (pr_raw_url, comp['workflow'], pr_count)
0133         table_items += '            <td><span class="removed">-%s</span><span class="added">+%s</span><span class="changed">%s</span></td>\n' \
0134             % (comp['removed_elements'], comp['added_elements'], comp['changed_elements'])
0135         table_items += '        </tr>\n'
0136
0137     result = result.replace('$TOTAL_CHANGES$', str(total_changes))
0138     result = result.replace('$NUMBER_OF_WORKFLOWS$', str(len(COMPARISON_RESULTS)))
0139     result = result.replace('$PER_WORKFLOW_LIST$', table_items)
0140     template_file.close()
0141
0142     # Write output
0143     result_file_path = os.path.join(summary_dir, 'dqm-histo-comparison-summary.html')
0144     if os.path.dirname(result_file_path):
0145         if not os.path.exists(os.path.dirname(result_file_path)):
0146             os.makedirs(os.path.dirname(result_file_path))
0147     summary_file = open(result_file_path, 'w')
0148     summary_file.write(result)
0149     summary_file.close()
0150
0151 if __name__ == '__main__':
0152     parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements within DQM files found in base-dir with the ones found in in pr-dir. "
0153         "All workflow directories are searched for correctly named DQM root files. "
0154         "Comparison is done bin by bin and output is written to a root files containing only the changes.")
0155     parser.add_argument('-b', '--base-dir', help='Baseline IB directory', default='basedata/')
0156     parser.add_argument('-p', '--pr-dir', help='PR directory', default='prdata/')
0157     parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
0158     parser.add_argument('-j', '--nprocs', help='Number of processes', default=1, type=int)
0159     parser.add_argument('-n', '--pr-number', help='This is obsolete and should NOT be used.', required=False)
0160     parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
0161     parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000')
0162     parser.add_argument('-s', '--summary-dir', help='Directory where summary with all links will be saved', default='')
0163     parser.add_argument('-l', '--pr-list', help='A list of PRs participating in the comparison', default='')
0164     parser.add_argument('-u', '--dqmgui-url', help='DQMGUI url to upload to', default='https://cmsweb.cern.ch/dqm/dev', required=False)
0165     args = parser.parse_args()
0166
0167     # Get the number of the PR which triggered the comparison
0168     pr_number = 'Unknown'
0169     try:
0170         pr_number = args.pr_list.split(' ')[0].split('/')[1].replace('#', '_')
0171     except:
0172         pass
0173
0174     release_format = args.release_format
0175     if not release_format:
0176         try:
0177             release_format = os.environ['CMSSW_VERSION']
0178         except:
0179             print('You are not in a CMSSW release. Please provide a valid release-format (-r option)')
0180             os._exit(1)
0181
0182     collect_and_compare_files(args.base_dir, args.pr_dir, args.output_dir, args.nprocs, pr_number, args.test_number, release_format)
0183     upload_to_gui(args.output_dir, args.nprocs, args.dqmgui_url)
0184     generate_summary_html(args.output_dir, args.pr_list, args.summary_dir, args.dqmgui_url)