File indexing completed on 2023-03-17 11:26:49
0001
0002
0003 '''
0004 Helper functions for CherryPy application ``browse_db.py``.
0005
0006 Author: Albertas Gimbutas, Vilnius University (LT)
0007 e-mail: albertasgim@gmail.com
0008 '''
0009 from __future__ import print_function
0010
0011 import sqlite3
0012 import re
0013 from os import getcwd, listdir
0014 from os.path import join
0015 from urllib import quote
0016 from functools import reduce
0017
0018
0019 renaming = {
0020 'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
0021 'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
0022 'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
0023 'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
0024 'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
0025 'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
0026 'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
0027 'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
0028 'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
0029 'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
0030 'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
0031 'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
0032 'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
0033 'Info': 'Miscellanea', 'Tracking': 'Tracking',
0034 'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
0035 'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
0036 'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
0037 'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
0038 'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
0039 'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
0040 'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
0041 'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
0042 'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
0043 'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
0044 'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
0045 'EcalBarrel': 'Ecal Calorimeter'
0046 }
0047
0048
0049 def get_img_path(filename, path):
0050 '''Returns image path for https://cmsweb.cern.ch/dqm histogram
0051 visualisation service'''
0052 run = int(re.findall('_R(\d*)__', filename)[0])
0053 parts = [e.rstrip('.root') for e in filename.split('__')]
0054 path = path.replace('Run summary/', '')
0055 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
0056
0057
0058 def get_img_url(path, f1, f2=None, w=250, h=250):
0059 '''Returns full URL of histogram (or histogram overlay) image for
0060 https://cmsweb.cern.ch/dqm visualisation service.'''
0061 base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
0062 if not f2:
0063 return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
0064 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
0065 get_img_path(f1, path), get_img_path(f2, path), w, h)
0066
0067
0068 def get_dataset_name(name):
0069 '''Returns extracted dataset name from the given ROOT filename.'''
0070 if re.search('RelVal', name):
0071 run = str(int(re.findall('_R(\d{9})_', name)[0]))
0072 ds = re.findall('GR_R_\d*_V\d*C?_(?:RelVal)?_([\w\d]*-v\d+)_', name)[0]
0073 else:
0074 run, ds = re.findall('R(\d{9})__([\w\d]*)__CMSSW_', name)[0:1]
0075 return '_'.join([ds, str(int(run))])
0076
0077
0078 def get_release(name):
0079 '''Returns extracted release from the given ROOT filename.'''
0080 return re.findall('R\d{9}__([\w\d_-]*)__DQM.root', name)[0]
0081
0082
0083 def get_stats(c, threshold, dir_ranges):
0084 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
0085 successes, nulls, fails = 0, 0, 0
0086 for from_id, till_id in dir_ranges:
0087 c.execute('''SELECT count(*) FROM HistogramComparison
0088 WHERE p_value >= 0 AND p_value > ? AND
0089 id >= ? and id <= ?''', (threshold, from_id, till_id))
0090 successes += c.fetchone()[0]
0091 c.execute('''SELECT count(*) FROM HistogramComparison WHERE
0092 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
0093 nulls += c.fetchone()[0]
0094 c.execute('''SELECT count(*) FROM HistogramComparison
0095 WHERE p_value >= 0 AND p_value <= ? AND
0096 id >= ? AND id <= ?''', (threshold, from_id, till_id))
0097 fails += c.fetchone()[0]
0098 return successes, nulls, fails
0099
0100
0101 def get_percentage(successes, nulls, fails):
0102 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
0103 if successes is None:
0104 return None, None, None
0105 total = successes + fails + nulls
0106 if not total:
0107 return None, None, None
0108 success = round(100. * successes / total, 2)
0109 null = round(100. * nulls / total, 2)
0110 fail = round(100. * fails / total, 2)
0111 return success, null, fail
0112
0113
0114 def get_folders(c, file_id, filename, dir_id, threshold):
0115 '''Returns file folder stats for one "summary table" column.'''
0116 ds_name = get_dataset_name(filename)
0117 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
0118 Directory WHERE parent_id=?''', (dir_id,))
0119 dirs = c.fetchall()
0120 file_folders = dict()
0121 total_successes, total_nulls, total_fails = 0, 0, 0
0122 for name, from_id, till_id in dirs:
0123 successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
0124 total_successes += successes
0125 total_nulls += nulls
0126 total_fails += fails
0127 if name in file_folders:
0128 file_folders[name].append([file_id, ds_name, successes, nulls, fails])
0129 else:
0130 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
0131 return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
0132
0133
0134 def join_ranges(ranges, elem):
0135 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
0136 if isinstance(ranges, tuple):
0137 ranges = [ranges]
0138 if ranges[-1][-1] + 1 == elem[0]:
0139 ranges[-1] = (ranges[-1][0], elem[1])
0140 else:
0141 ranges.append(elem)
0142 return ranges
0143
0144
0145 def get_release_list(c):
0146 '''Returns all ``ReleaseComparisons`` found on database.'''
0147 c.execute('SELECT title, statistical_test FROM ReleaseComparison')
0148 return c.fetchall()
0149
0150
0151 def db_list_with_releases(path='.'):
0152 '''Returns available database list and their releases.'''
0153 db_list = [db for db in listdir(path) if db.endswith('.db')]
0154 db_list_with_releases = []
0155 for db in db_list:
0156 conn = sqlite3.connect(join(path, db))
0157 releases = get_release_list(conn.cursor())
0158 db_list_with_releases.append((db[:-3], releases))
0159 conn.close()
0160 return db_list_with_releases
0161
0162
0163
0164 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
0165 '''Returns context for ``release_summary.html`` template.'''
0166
0167 context = dict()
0168 c.execute('''SELECT release1, release2, id FROM ReleaseComparison
0169 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
0170 context['release1'], context['release2'], release_comp_id = c.fetchone()
0171
0172
0173 c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
0174 WHERE id IN (SELECT directory_id FROM RootFileComparison
0175 WHERE release_comparison_id = ?)''', (release_comp_id,))
0176 dir_ranges = c.fetchall()
0177
0178 if len(dir_ranges) > 1:
0179 dir_ranges = reduce(join_ranges, dir_ranges)
0180
0181 context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
0182
0183 context['total'] = context['successes'] + context['fails'] + context['nulls']
0184 if context['total']:
0185 context['success'], context['null'], context['fail'] = \
0186 get_percentage(context['successes'], context['nulls'], context['fails'])
0187
0188
0189 c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
0190 WHERE release_comparison_id = ?''', (release_comp_id,))
0191 files = c.fetchall()
0192
0193
0194 folders = dict()
0195 for file_id, filename, dir_id in files:
0196
0197 file_folders = get_folders(c, file_id, filename, dir_id, threshold)
0198 for folder_name, file_folder_stats in file_folders:
0199 if folder_name in folders:
0200
0201 folders[folder_name].append(file_folder_stats)
0202
0203 folders[folder_name][0][2] += file_folder_stats[2]
0204 folders[folder_name][0][3] += file_folder_stats[3]
0205 folders[folder_name][0][4] += file_folder_stats[4]
0206 else:
0207 folder_summary = [None, 'Summary', file_folder_stats[2],
0208 file_folder_stats[3], file_folder_stats[4]]
0209 folders[folder_name] = [folder_summary, file_folder_stats]
0210
0211
0212 folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
0213 for folder, file_stats in folders:
0214
0215 if len(file_stats) != len(files)+1:
0216 for i, file_ in enumerate(files):
0217 if file_[0] != file_stats[i][0]:
0218 file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
0219
0220 for i, stats in enumerate(file_stats):
0221 stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
0222 context['folders'] = folders
0223
0224
0225
0226 for folder in folders:
0227 print(folder)
0228
0229
0230
0231
0232
0233
0234 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0235 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
0236 WHERE release_comparison_id = ?)''', (release_comp_id,))
0237 lvl3_dir_ranges = c.fetchall()
0238
0239 cum_lvl3_dir_ranges = dict()
0240 for name, from_id, till_id in lvl3_dir_ranges:
0241 if name in cum_lvl3_dir_ranges:
0242 cum_lvl3_dir_ranges[name].append((from_id, till_id))
0243 else:
0244 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
0245
0246
0247 summary_stats = dict()
0248 detailed_stats = dict()
0249 for name, ranges in cum_lvl3_dir_ranges.items():
0250 successes, nulls, fails = get_stats(c, threshold, ranges)
0251 if name in detailed_stats:
0252 detailed_stats[name][0] += successes
0253 detailed_stats[name][1] += nulls
0254 detailed_stats[name][2] += fails
0255 else:
0256 detailed_stats[name] = [successes, nulls, fails]
0257 if name in renaming:
0258 if renaming[name] in summary_stats:
0259 summary_stats[renaming[name]][0] += successes
0260 summary_stats[renaming[name]][1] += nulls
0261 summary_stats[renaming[name]][2] += fails
0262 else:
0263 summary_stats[renaming[name]] = [successes, nulls, fails]
0264
0265
0266 summary_ratios = []
0267 for name, stats in summary_stats.items():
0268 total = sum(stats)
0269 if total:
0270 ratio = float(stats[0]) / sum(stats)
0271 summary_ratios.append((name, ratio))
0272 detailed_ratios = []
0273 for name, stats in detailed_stats.items():
0274 total = sum(stats)
0275 if total:
0276 ratio = float(stats[0]) / sum(stats)
0277 detailed_ratios.append((name, ratio))
0278
0279 context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
0280 context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
0281 return context
0282
0283
0284 def get_directory_summary_stats(c, url_args, file_id, threshold):
0285 '''Returns context for ``directory_summary.html`` template.'''
0286 context = dict()
0287 c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
0288 WHERE id = ?''', (file_id,))
0289 dir_id, f1, f2 = c.fetchone()
0290 context['release1'] = get_release(f1)
0291 context['release2'] = get_release(f2)
0292 if not url_args:
0293 dir_name = get_dataset_name(f1)
0294 else:
0295
0296 directory_names = []
0297
0298 for dir_name in url_args:
0299 c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
0300 parent_id = ?''', (dir_name, dir_id))
0301 dir_id, name = c.fetchone()
0302 directory_names.append(name)
0303 context['parent_name'] = '/'.join(directory_names)
0304
0305
0306 c.execute('''SELECT from_histogram_id, till_histogram_id FROM
0307 Directory WHERE id = ?''', (dir_id,))
0308 ranges = c.fetchone()
0309 successes, nulls, fails = get_stats(c, threshold, (ranges,))
0310 success, null, fail = get_percentage(successes, nulls, fails)
0311 context.update({
0312 'successes': successes, 'nulls': nulls, 'fails': fails,
0313 'success': success, 'null': null, 'fail': fail,
0314 'total': successes + nulls + fails, 'dir_name': dir_name
0315 })
0316
0317 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0318 WHERE parent_id = ?''', (dir_id,))
0319 subdirs = c.fetchall()
0320 subdir_stats = []
0321 for name, from_id, till_id in subdirs:
0322 successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
0323 success, null, fail = get_percentage(successes, nulls, fails)
0324 subdir_stats.append((name, successes + nulls + fails, successes,
0325 nulls, fails, success, null, fail))
0326 context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
0327
0328
0329 c.execute('''SELECT name, p_value FROM HistogramComparison
0330 WHERE directory_id = ?''', (dir_id,))
0331 failed_histos = []
0332 successful_histos = []
0333 null_histos = []
0334 for name, p_value in c.fetchall():
0335 path = quote('%s/%s' % ('/'.join(url_args), name))
0336 url1 = get_img_url(path, f1)
0337 url2 = get_img_url(path, f2)
0338 overlay = get_img_url(path, f1, f2)
0339 if p_value < 0:
0340 null_histos.append((name, p_value, url1, url2, overlay))
0341 elif p_value <= threshold:
0342 failed_histos.append((name, p_value, url1, url2, overlay))
0343 else:
0344 successful_histos.append((name, p_value, url1, url2, overlay))
0345
0346 context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
0347 context['null_histos'] = null_histos
0348 context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
0349 return context