compareDQMOutput.py

CMSSW/DQMServices/FileIO/scripts/compareDQMOutput.py

Line Code

Line	Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184	`#!/bin/env python3` `import os` `import sys` `import glob` `import argparse` `import subprocess` `from threading import Thread` `COMPARISON_RESULTS = []` `def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format):` `files = get_file_pairs(base_dir, pr_dir)` `threads = []` `for _ in range(num_procs):` `thread = Thread(target=compare, args=(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format))` `thread.start()` `threads.append(thread)` `[thread.join() for thread in threads]` `COMPARISON_RESULTS.sort(key=lambda k: float(k['workflow']))` `def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format):` `while files:` `try:` `file_name = files.pop()` `command = ['compareHistograms.py', '-b', os.path.join(base_dir, file_name), \` `'-p', os.path.join(pr_dir, file_name), '-o', output_dir, '-n', pr_number, '-t', test_number, '-r', release_format]` `print('Running comparison:')` `print(' '.join(command))` `output = subprocess.check_output(command).decode()` `output_elements = output.split('\n')[1:]` `base_output_filename = output_elements[0]` `pr_output_filename = output_elements[1]` `run_nr = base_output_filename.split('_')[2].lstrip('R').lstrip('0')` `output_numbers = output_elements[2].split(' ')` `workflow = os.path.basename(os.path.dirname(os.path.join(base_dir, file_name))).split('_')[0]` `base_dataset = '/' + '/'.join(base_output_filename.rstrip('.root').split('__')[1:])` `pr_dataset = '/' + '/'.join(pr_output_filename.rstrip('.root').split('__')[1:])` `cmssw_version = '_'.join(release_format.split('_')[:4])` `cmssw_version = cmssw_version[:-1] + 'x'` `root_file_dir_in_gui = 'ROOT/RelValData/%s/' % cmssw_version` `if 'R000000001__RelVal' in base_output_filename:` `root_file_dir_in_gui = 'ROOT/RelVal/%s/' % cmssw_version` `base_file_path_in_gui = root_file_dir_in_gui + base_output_filename` `pr_file_path_in_gui = root_file_dir_in_gui + pr_output_filename` `COMPARISON_RESULTS.append({'workflow': workflow, 'base_dataset': base_dataset, 'pr_dataset': pr_dataset, 'run_nr': run_nr,\` `'changed_elements': int(output_numbers[0]), 'removed_elements': int(output_numbers[1]), 'added_elements': int(output_numbers[2]),` `'base_file_path_in_gui': base_file_path_in_gui, 'pr_file_path_in_gui': pr_file_path_in_gui})` `except Exception as ex:` `print('Exception comparing two root files: %s' % ex)` `def get_file_pairs(base_dir, pr_dir):` `base_files = glob.glob(os.path.join(base_dir, '._/DQM_.root'))` `pr_files = glob.glob(os.path.join(pr_dir, '._/DQM_.root'))` `# Remove base directories and leave` `# only parts of paths that are same` `base_files = [ os.path.relpath(x, base_dir) for x in base_files ]` `pr_files = [ os.path.relpath(x, pr_dir) for x in pr_files ]` `# Find intersection` `return [value for value in base_files if value in pr_files]` `def upload_to_gui(output_dir, num_procs, dqmgui_url):` `base_files = glob.glob(os.path.join(output_dir, 'base/.root'))` `pr_files = glob.glob(os.path.join(output_dir, 'pr/.root'))` `files = base_files + pr_files` `print('Files to be uploaded:')` `print(files)` `for _ in range(min(num_procs, len(files))):` `thread = Thread(target=upload, args=(files, dqmgui_url))` `thread.start()` `def upload(files, dqmgui_url):` `while files:` `try:` `file = files.pop()` `command = ['visDQMUpload.py', dqmgui_url, file]` `print('Uploading output:')` `print(' '.join(command))` `subprocess.call(command)` `print('')` `except Exception as ex:` `# This might throw when another thread pops the last filename immediately after this one` `# started the loop. In this case this exception can be safely ignored.` `print('Exception uploading a file: %s' % ex)` `def generate_summary_html(output_dir, pr_list, summary_dir, dqmgui_url):` `template_file_path = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')` `if not os.path.isfile(template_file_path):` `template_file_path = os.path.join(os.getenv('CMSSW_RELEASE_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')` `template_file = open(template_file_path, 'r')` `result = template_file.read()` `result = result.replace('$PR_LIST$', pr_list)` `table_items = ''` `total_changes = 0` `for comp in COMPARISON_RESULTS:` `total_changes += comp['removed_elements'] + comp['added_elements'] + comp['changed_elements']` `baseline_count = comp['changed_elements'] + comp['removed_elements']` `pr_count = comp['changed_elements'] + comp['added_elements']` `overlay_count = baseline_count` `# Make urls` `base_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['base_dataset'])` `pr_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['pr_dataset'])` `overlay_url = '%s/start?runnr=%s;dataset%%3D%s;referenceshow%%3Dall;referencenorm=False;referenceobj1%%3Dother::%s::;sampletype%%3Doffline_relval;workspace%%3DEverything;' \` `% (dqmgui_url, comp['run_nr'], comp['pr_dataset'], comp['base_dataset'])` `base_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['base_file_path_in_gui'])` `pr_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['pr_file_path_in_gui'])` `table_items += ' <tr>\n'` `table_items += ' <td><a href="%s" target="_blank">%s baseline GUI</a><span> (%s)</span></td>\n' % (base_url, comp['workflow'], baseline_count)` `table_items += ' <td><a href="%s" target="_blank">%s pr GUI</a><span> (%s)</span></td>\n' % (pr_url, comp['workflow'], pr_count)` `table_items += ' <td><a href="%s" target="_blank">%s overlay GUI</a><span> (%s)</span></td>\n' % (overlay_url, comp['workflow'], overlay_count)` `table_items += ' <td><a href="%s" target="_blank">%s baseline rootjs</a><span> (%s)</span></td>\n' % (base_raw_url, comp['workflow'], baseline_count)` `table_items += ' <td><a href="%s" target="_blank">%s pr rootjs</a><span> (%s)</span></td>\n' % (pr_raw_url, comp['workflow'], pr_count)` `table_items += ' <td><span class="removed">-%s</span><span class="added">+%s</span><span class="changed">%s</span></td>\n' \` `% (comp['removed_elements'], comp['added_elements'], comp['changed_elements'])` `table_items += ' </tr>\n'` `result = result.replace('$TOTAL_CHANGES$', str(total_changes))` `result = result.replace('$NUMBER_OF_WORKFLOWS$', str(len(COMPARISON_RESULTS)))` `result = result.replace('$PER_WORKFLOW_LIST$', table_items)` `template_file.close()` `# Write output` `result_file_path = os.path.join(summary_dir, 'dqm-histo-comparison-summary.html')` `if os.path.dirname(result_file_path):` `if not os.path.exists(os.path.dirname(result_file_path)):` `os.makedirs(os.path.dirname(result_file_path))` `summary_file = open(result_file_path, 'w')` `summary_file.write(result)` `summary_file.close()` `if __name__ == '__main__':` `parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements within DQM files found in base-dir with the ones found in in pr-dir. "` `"All workflow directories are searched for correctly named DQM root files. "` `"Comparison is done bin by bin and output is written to a root files containing only the changes.")` `parser.add_argument('-b', '--base-dir', help='Baseline IB directory', default='basedata/')` `parser.add_argument('-p', '--pr-dir', help='PR directory', default='prdata/')` `parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')` `parser.add_argument('-j', '--nprocs', help='Number of processes', default=1, type=int)` `parser.add_argument('-n', '--pr-number', help='This is obsolete and should NOT be used.', required=False)` `parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')` `parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000')` `parser.add_argument('-s', '--summary-dir', help='Directory where summary with all links will be saved', default='')` `parser.add_argument('-l', '--pr-list', help='A list of PRs participating in the comparison', default='')` `parser.add_argument('-u', '--dqmgui-url', help='DQMGUI url to upload to', default='https://cmsweb.cern.ch/dqm/dev', required=False)` `args = parser.parse_args()` `# Get the number of the PR which triggered the comparison` `pr_number = 'Unknown'` `try:` `pr_number = args.pr_list.split(' ')[0].split('/')[1].replace('#', '_')` `except:` `pass` `release_format = args.release_format` `if not release_format:` `try:` `release_format = os.environ['CMSSW_VERSION']` `except:` `print('You are not in a CMSSW release. Please provide a valid release-format (-r option)')` `os._exit(1)` `collect_and_compare_files(args.base_dir, args.pr_dir, args.output_dir, args.nprocs, pr_number, args.test_number, release_format)` `upload_to_gui(args.output_dir, args.nprocs, args.dqmgui_url)` `generate_summary_html(args.output_dir, args.pr_list, args.summary_dir, args.dqmgui_url)`

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184

#!/bin/env python3

import os
import sys
import glob
import argparse
import subprocess
from threading import Thread

COMPARISON_RESULTS = []

def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format):
    files = get_file_pairs(base_dir, pr_dir)

    threads = []
    for _ in range(num_procs):
        thread = Thread(target=compare, args=(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format))
        thread.start()
        threads.append(thread)

    [thread.join() for thread in threads]

    COMPARISON_RESULTS.sort(key=lambda k: float(k['workflow']))

def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format):
    while files:
        try:
            file_name = files.pop()
            command = ['compareHistograms.py', '-b', os.path.join(base_dir, file_name), \
                '-p', os.path.join(pr_dir, file_name), '-o', output_dir, '-n', pr_number, '-t', test_number, '-r', release_format]
            print('Running comparison:')
            print(' '.join(command))
            
            output = subprocess.check_output(command).decode()

            output_elements = output.split('\n')[1:]
            base_output_filename = output_elements[0]
            pr_output_filename = output_elements[1]
            run_nr = base_output_filename.split('_')[2].lstrip('R').lstrip('0')
            output_numbers = output_elements[2].split(' ')
            
            workflow = os.path.basename(os.path.dirname(os.path.join(base_dir, file_name))).split('_')[0]
            base_dataset = '/' + '/'.join(base_output_filename.rstrip('.root').split('__')[1:])
            pr_dataset = '/' + '/'.join(pr_output_filename.rstrip('.root').split('__')[1:])
            
            cmssw_version = '_'.join(release_format.split('_')[:4])
            cmssw_version = cmssw_version[:-1] + 'x'
            root_file_dir_in_gui = 'ROOT/RelValData/%s/' % cmssw_version
            if 'R000000001__RelVal' in base_output_filename:
                root_file_dir_in_gui = 'ROOT/RelVal/%s/' % cmssw_version

            base_file_path_in_gui = root_file_dir_in_gui + base_output_filename
            pr_file_path_in_gui = root_file_dir_in_gui + pr_output_filename
            
            COMPARISON_RESULTS.append({'workflow': workflow, 'base_dataset': base_dataset, 'pr_dataset': pr_dataset, 'run_nr': run_nr,\
                'changed_elements': int(output_numbers[0]), 'removed_elements': int(output_numbers[1]), 'added_elements': int(output_numbers[2]),
                'base_file_path_in_gui': base_file_path_in_gui, 'pr_file_path_in_gui': pr_file_path_in_gui})
        except Exception as ex:
            print('Exception comparing two root files: %s' % ex)
    
def get_file_pairs(base_dir, pr_dir):
    base_files = glob.glob(os.path.join(base_dir, '*.*_*/DQM_*.root'))
    pr_files = glob.glob(os.path.join(pr_dir, '*.*_*/DQM_*.root'))

    # Remove base directories and leave
    # only parts of paths that are same
    base_files = [ os.path.relpath(x, base_dir) for x in base_files ]
    pr_files =   [ os.path.relpath(x, pr_dir) for x in pr_files ]
    
    # Find intersection
    return [value for value in base_files if value in pr_files]

def upload_to_gui(output_dir, num_procs, dqmgui_url):
    base_files = glob.glob(os.path.join(output_dir, 'base/*.root'))
    pr_files = glob.glob(os.path.join(output_dir, 'pr/*.root'))

    files = base_files + pr_files

    print('Files to be uploaded:')
    print(files)
    
    for _ in range(min(num_procs, len(files))):
        thread = Thread(target=upload, args=(files, dqmgui_url))
        thread.start()
    
def upload(files, dqmgui_url):
    while files:
        try:
            file = files.pop()
            command = ['visDQMUpload.py', dqmgui_url, file]
            print('Uploading output:')
            print(' '.join(command))
            
            subprocess.call(command)
            print('')
        except Exception as ex:
            # This might throw when another thread pops the last filename immediately after this one
            # started the loop. In this case this exception can be safely ignored.
            print('Exception uploading a file: %s' % ex)

def generate_summary_html(output_dir, pr_list, summary_dir, dqmgui_url):
    template_file_path = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
    if not os.path.isfile(template_file_path):
        template_file_path = os.path.join(os.getenv('CMSSW_RELEASE_BASE'), 'src', 'DQMServices', 'FileIO', 'scripts', 'dqm-histo-comparison-summary-template.html')
    template_file = open(template_file_path, 'r')
    result = template_file.read()

    result = result.replace('$PR_LIST$', pr_list)

    table_items = ''
    total_changes = 0
    
    for comp in COMPARISON_RESULTS:
        total_changes += comp['removed_elements'] + comp['added_elements'] + comp['changed_elements']
        baseline_count = comp['changed_elements'] + comp['removed_elements']
        pr_count = comp['changed_elements'] + comp['added_elements']
        overlay_count = baseline_count

        # Make urls
        base_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['base_dataset'])
        pr_url = '%s/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (dqmgui_url, comp['run_nr'], comp['pr_dataset'])
        overlay_url = '%s/start?runnr=%s;dataset%%3D%s;referenceshow%%3Dall;referencenorm=False;referenceobj1%%3Dother::%s::;sampletype%%3Doffline_relval;workspace%%3DEverything;' \
            % (dqmgui_url, comp['run_nr'], comp['pr_dataset'], comp['base_dataset'])
        base_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['base_file_path_in_gui'])
        pr_raw_url = '%s/jsroot/index.htm?file=%s/data/browse/%s' % (dqmgui_url, dqmgui_url, comp['pr_file_path_in_gui'])

        table_items += '        <tr>\n'
        table_items += '            <td><a href="%s" target="_blank">%s baseline GUI</a><span> (%s)</span></td>\n' % (base_url, comp['workflow'], baseline_count)
        table_items += '            <td><a href="%s" target="_blank">%s pr GUI</a><span> (%s)</span></td>\n' % (pr_url, comp['workflow'], pr_count)
        table_items += '            <td><a href="%s" target="_blank">%s overlay GUI</a><span> (%s)</span></td>\n' % (overlay_url, comp['workflow'], overlay_count)
        table_items += '            <td><a href="%s" target="_blank">%s baseline rootjs</a><span> (%s)</span></td>\n' % (base_raw_url, comp['workflow'], baseline_count)
        table_items += '            <td><a href="%s" target="_blank">%s pr rootjs</a><span> (%s)</span></td>\n' % (pr_raw_url, comp['workflow'], pr_count)
        table_items += '            <td><span class="removed">-%s</span><span class="added">+%s</span><span class="changed">%s</span></td>\n' \
            % (comp['removed_elements'], comp['added_elements'], comp['changed_elements'])
        table_items += '        </tr>\n'

    result = result.replace('$TOTAL_CHANGES$', str(total_changes))
    result = result.replace('$NUMBER_OF_WORKFLOWS$', str(len(COMPARISON_RESULTS)))
    result = result.replace('$PER_WORKFLOW_LIST$', table_items)
    template_file.close()

    # Write output
    result_file_path = os.path.join(summary_dir, 'dqm-histo-comparison-summary.html')
    if os.path.dirname(result_file_path):
        if not os.path.exists(os.path.dirname(result_file_path)):
            os.makedirs(os.path.dirname(result_file_path))
    summary_file = open(result_file_path, 'w')
    summary_file.write(result)
    summary_file.close()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements within DQM files found in base-dir with the ones found in in pr-dir. "
        "All workflow directories are searched for correctly named DQM root files. "
        "Comparison is done bin by bin and output is written to a root files containing only the changes.")
    parser.add_argument('-b', '--base-dir', help='Baseline IB directory', default='basedata/')
    parser.add_argument('-p', '--pr-dir', help='PR directory', default='prdata/')
    parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
    parser.add_argument('-j', '--nprocs', help='Number of processes', default=1, type=int)
    parser.add_argument('-n', '--pr-number', help='This is obsolete and should NOT be used.', required=False)
    parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
    parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000')
    parser.add_argument('-s', '--summary-dir', help='Directory where summary with all links will be saved', default='')
    parser.add_argument('-l', '--pr-list', help='A list of PRs participating in the comparison', default='')
    parser.add_argument('-u', '--dqmgui-url', help='DQMGUI url to upload to', default='https://cmsweb.cern.ch/dqm/dev', required=False)
    args = parser.parse_args()

    # Get the number of the PR which triggered the comparison
    pr_number = 'Unknown'
    try:
        pr_number = args.pr_list.split(' ')[0].split('/')[1].replace('#', '_')
    except:
        pass

    release_format = args.release_format
    if not release_format:
        try:
            release_format = os.environ['CMSSW_VERSION']
        except:
            print('You are not in a CMSSW release. Please provide a valid release-format (-r option)')
            os._exit(1)

    collect_and_compare_files(args.base_dir, args.pr_dir, args.output_dir, args.nprocs, pr_number, args.test_number, release_format)
    upload_to_gui(args.output_dir, args.nprocs, args.dqmgui_url)
    generate_summary_html(args.output_dir, args.pr_list, args.summary_dir, args.dqmgui_url)