Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:31:48

0001 #!/usr/bin/env python
0002 # coding: utf-8
0003 '''
0004 Helper functions for CherryPy application ``browse_db.py``.
0005 
0006 Author:  Albertas Gimbutas,  Vilnius University (LT)
0007 e-mail:  albertasgim@gmail.com
0008 '''
0009 from __future__ import print_function
0010 
0011 import sqlite3
0012 import re
0013 from os import getcwd, listdir
0014 from os.path import join
0015 from urllib import quote
0016 from functools import reduce
0017 
0018 
0019 renaming = {
0020         'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
0021         'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
0022         'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
0023         'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
0024         'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
0025         'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
0026         'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
0027         'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
0028         'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
0029         'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
0030         'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
0031         'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
0032         'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
0033         'Info': 'Miscellanea', 'Tracking': 'Tracking',
0034         'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
0035         'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
0036         'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
0037         'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
0038         'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
0039         'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
0040         'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
0041         'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
0042         'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
0043         'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
0044         'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
0045         'EcalBarrel': 'Ecal Calorimeter'
0046 }
0047 
0048 
0049 def get_img_path(filename, path):
0050     '''Returns image path for https://cmsweb.cern.ch/dqm histogram
0051     visualisation service'''
0052     run = int(re.findall('_R(\d*)__', filename)[0])
0053     parts = [e.rstrip('.root') for e in filename.split('__')]
0054     path = path.replace('Run summary/', '')
0055     return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
0056 
0057 
0058 def get_img_url(path, f1, f2=None, w=250, h=250):
0059     '''Returns full URL of histogram (or histogram overlay) image for
0060     https://cmsweb.cern.ch/dqm visualisation service.'''
0061     base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
0062     if not f2:
0063         return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
0064     return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
0065                  get_img_path(f1, path), get_img_path(f2, path), w, h)
0066 
0067 
0068 def get_dataset_name(name):
0069     '''Returns extracted dataset name from the given ROOT filename.'''
0070     if re.search('RelVal', name):
0071         run = str(int(re.findall('_R(\d{9})_', name)[0]))
0072         ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
0073     else:
0074         run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
0075     return '_'.join([ds, str(int(run))])
0076 
0077 
0078 def get_release(name):
0079     '''Returns extracted release from the given ROOT filename.'''
0080     return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
0081 
0082 
0083 def get_stats(c, threshold, dir_ranges):
0084     '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
0085     successes, nulls, fails = 0, 0, 0
0086     for from_id, till_id in dir_ranges:
0087         c.execute('''SELECT count(*) FROM HistogramComparison
0088                      WHERE p_value >= 0 AND p_value > ? AND
0089                      id >= ? and id <= ?''', (threshold, from_id, till_id))
0090         successes += c.fetchone()[0]
0091         c.execute('''SELECT count(*) FROM HistogramComparison WHERE
0092                      p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
0093         nulls += c.fetchone()[0]
0094         c.execute('''SELECT count(*) FROM HistogramComparison
0095                      WHERE p_value >= 0 AND p_value <= ? AND
0096                      id >= ? AND id <= ?''', (threshold, from_id, till_id))
0097         fails += c.fetchone()[0]
0098     return successes, nulls, fails
0099 
0100 
0101 def get_percentage(successes, nulls, fails):
0102     '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
0103     if successes is None:
0104         return None, None, None
0105     total = successes + fails + nulls
0106     if not total:
0107         return None, None, None
0108     success =  round(100. * successes / total, 2)
0109     null =  round(100. * nulls / total, 2)
0110     fail =  round(100. * fails / total, 2)
0111     return success, null, fail
0112 
0113 
0114 def get_folders(c, file_id, filename, dir_id, threshold):  # TODO: If folder [Egamma|JetMet] analyse their subdirs
0115     '''Returns file folder stats for one "summary table" column.'''
0116     ds_name = get_dataset_name(filename)
0117     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
0118                  Directory WHERE parent_id=?''', (dir_id,))
0119     dirs = c.fetchall()
0120     file_folders = dict()
0121     total_successes, total_nulls, total_fails = 0, 0, 0
0122     for name, from_id, till_id in dirs:
0123         successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
0124         total_successes += successes
0125         total_nulls += nulls
0126         total_fails += fails
0127         if name in file_folders:
0128             file_folders[name].append([file_id, ds_name, successes, nulls, fails])
0129         else:
0130             file_folders[name] = [file_id, ds_name, successes, nulls, fails]
0131     return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
0132 
0133 
0134 def join_ranges(ranges, elem):
0135     '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
0136     if isinstance(ranges, tuple):
0137         ranges = [ranges]
0138     if ranges[-1][-1] + 1 == elem[0]:
0139         ranges[-1] = (ranges[-1][0], elem[1])
0140     else:
0141         ranges.append(elem)
0142     return ranges
0143 
0144 
0145 def get_release_list(c):
0146     '''Returns all ``ReleaseComparisons`` found on database.'''
0147     c.execute('SELECT title, statistical_test FROM ReleaseComparison')
0148     return c.fetchall()
0149 
0150 
0151 def db_list_with_releases(path='.'):
0152     '''Returns available database list and their releases.'''
0153     db_list = [db for db in listdir(path) if db.endswith('.db')]
0154     db_list_with_releases = []
0155     for db in db_list:
0156         conn = sqlite3.connect(join(path, db))
0157         releases = get_release_list(conn.cursor())
0158         db_list_with_releases.append((db[:-3], releases))
0159         conn.close()
0160     return db_list_with_releases
0161 
0162 # -------------------     Template Context generators     --------------------
0163 
0164 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
0165     '''Returns context for ``release_summary.html`` template.'''
0166     ## Summary
0167     context = dict()
0168     c.execute('''SELECT release1, release2, id FROM ReleaseComparison
0169                  WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
0170     context['release1'], context['release2'], release_comp_id = c.fetchone()
0171 
0172     # All directory ranges
0173     c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
0174                    WHERE id IN (SELECT directory_id FROM RootFileComparison
0175                    WHERE release_comparison_id = ?)''', (release_comp_id,))
0176     dir_ranges = c.fetchall()
0177 
0178     if len(dir_ranges) > 1:
0179         dir_ranges = reduce(join_ranges, dir_ranges)
0180 
0181     context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
0182 
0183     context['total'] = context['successes'] + context['fails'] + context['nulls']
0184     if context['total']:
0185         context['success'], context['null'], context['fail'] = \
0186             get_percentage(context['successes'], context['nulls'], context['fails'])
0187 
0188     ## Data needed for the all the statistics:
0189     c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
0190                  WHERE release_comparison_id = ?''', (release_comp_id,))
0191     files = c.fetchall()
0192 
0193     ## folders: [(folder_name, [folder: (file_id, filename, success, null, fail)]), ...]
0194     folders = dict()
0195     for file_id, filename, dir_id in files:
0196         # file_folders: [(folder_name, [(file_id, file_name, success, null, fail)]), ...]
0197         file_folders = get_folders(c, file_id, filename, dir_id, threshold)
0198         for folder_name, file_folder_stats in file_folders:
0199             if folder_name in folders:
0200                 # Add folder stats
0201                 folders[folder_name].append(file_folder_stats)
0202                 # Update folder summary
0203                 folders[folder_name][0][2] += file_folder_stats[2]
0204                 folders[folder_name][0][3] += file_folder_stats[3]
0205                 folders[folder_name][0][4] += file_folder_stats[4]
0206             else:
0207                 folder_summary = [None, 'Summary', file_folder_stats[2],
0208                                     file_folder_stats[3], file_folder_stats[4]]
0209                 folders[folder_name] = [folder_summary, file_folder_stats]
0210 
0211     ## Calculate ratios
0212     folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
0213     for folder, file_stats in folders:
0214         # Insert N/A if histo is missing
0215         if len(file_stats) != len(files)+1:
0216             for i, file_ in enumerate(files):
0217                 if file_[0] != file_stats[i][0]:
0218                     file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
0219         # Count the ratios
0220         for i, stats in enumerate(file_stats):
0221             stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
0222     context['folders'] = folders
0223 
0224 
0225     ## Select Summary Barchart, Detailed Barchart
0226     for folder in folders:
0227         print(folder)
0228     #   detailed_ratios: (name, success_ratio)
0229     #   summary_ratios: (name, success_ratio)
0230 
0231 
0232     ## Summary Barchart
0233     # TODO: optimise not to fetch from DB again.
0234     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0235                  WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
0236                  WHERE release_comparison_id = ?)''', (release_comp_id,))
0237     lvl3_dir_ranges = c.fetchall()
0238 
0239     cum_lvl3_dir_ranges = dict()
0240     for name, from_id, till_id in lvl3_dir_ranges:
0241         if name in cum_lvl3_dir_ranges:
0242             cum_lvl3_dir_ranges[name].append((from_id, till_id))
0243         else:
0244             cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
0245 
0246     # Fetch stats
0247     summary_stats = dict()
0248     detailed_stats = dict()
0249     for name, ranges in cum_lvl3_dir_ranges.items():
0250         successes, nulls, fails = get_stats(c, threshold, ranges)
0251         if name in detailed_stats:
0252             detailed_stats[name][0] += successes
0253             detailed_stats[name][1] += nulls
0254             detailed_stats[name][2] += fails
0255         else:
0256             detailed_stats[name] = [successes, nulls, fails]
0257         if name in renaming:
0258             if renaming[name] in summary_stats:
0259                 summary_stats[renaming[name]][0] += successes
0260                 summary_stats[renaming[name]][1] += nulls
0261                 summary_stats[renaming[name]][2] += fails
0262             else:
0263                 summary_stats[renaming[name]] = [successes, nulls, fails]
0264 
0265     # Calculate ratio
0266     summary_ratios = []
0267     for name, stats in summary_stats.items():
0268         total = sum(stats)
0269         if total:
0270             ratio = float(stats[0]) / sum(stats)
0271             summary_ratios.append((name, ratio))
0272     detailed_ratios = []
0273     for name, stats in detailed_stats.items():
0274         total = sum(stats)
0275         if total:
0276             ratio = float(stats[0]) / sum(stats)
0277             detailed_ratios.append((name, ratio))
0278 
0279     context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
0280     context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
0281     return context
0282 
0283 
0284 def get_directory_summary_stats(c, url_args, file_id, threshold):
0285     '''Returns context for ``directory_summary.html`` template.'''
0286     context = dict()
0287     c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
0288                  WHERE id = ?''', (file_id,))
0289     dir_id, f1, f2 = c.fetchone()
0290     context['release1'] = get_release(f1)
0291     context['release2'] = get_release(f2)
0292     if not url_args:
0293         dir_name = get_dataset_name(f1)
0294     else:
0295         #### Select DQMData/Run directory.
0296         directory_names = []
0297 
0298         for dir_name in url_args:
0299             c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
0300                     parent_id = ?''', (dir_name, dir_id))
0301             dir_id, name = c.fetchone()
0302             directory_names.append(name)
0303         context['parent_name'] = '/'.join(directory_names)
0304 
0305     ## Select stats
0306     c.execute('''SELECT from_histogram_id, till_histogram_id FROM
0307                  Directory WHERE id = ?''', (dir_id,))
0308     ranges = c.fetchone()
0309     successes, nulls, fails = get_stats(c, threshold, (ranges,))
0310     success, null, fail = get_percentage(successes, nulls, fails)
0311     context.update({
0312             'successes': successes, 'nulls': nulls, 'fails': fails,
0313             'success': success, 'null': null, 'fail': fail,
0314             'total': successes + nulls + fails, 'dir_name': dir_name
0315         })
0316     # subdirs: name, total, success, fail, null
0317     c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0318                  WHERE parent_id = ?''', (dir_id,))
0319     subdirs = c.fetchall()
0320     subdir_stats = []
0321     for name, from_id, till_id in subdirs:
0322         successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
0323         success, null, fail = get_percentage(successes, nulls, fails)
0324         subdir_stats.append((name, successes + nulls + fails, successes,
0325                              nulls, fails, success, null, fail))
0326     context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
0327 
0328     # histograms: name, p_value
0329     c.execute('''SELECT name, p_value FROM HistogramComparison
0330                  WHERE directory_id = ?''', (dir_id,))
0331     failed_histos = []
0332     successful_histos = []
0333     null_histos = []
0334     for name, p_value in c.fetchall():
0335         path = quote('%s/%s' % ('/'.join(url_args), name))
0336         url1 = get_img_url(path, f1)
0337         url2 = get_img_url(path, f2)
0338         overlay = get_img_url(path, f1, f2)
0339         if p_value < 0:
0340             null_histos.append((name, p_value, url1, url2, overlay))
0341         elif p_value <= threshold:
0342             failed_histos.append((name, p_value, url1, url2, overlay))
0343         else:
0344             successful_histos.append((name, p_value, url1, url2, overlay))
0345 
0346     context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
0347     context['null_histos'] = null_histos
0348     context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
0349     return context