File indexing completed on 2024-11-26 02:34:35
0001
0002
0003 '''
0004 Helper functions for CherryPy application ``browse_db.py``.
0005
0006 Author: Albertas Gimbutas, Vilnius University (LT)
0007 e-mail: albertasgim@gmail.com
0008 '''
0009
0010 import sqlite3
0011 import re
0012 from os import getcwd, listdir
0013 from os.path import join
0014 from urllib import quote
0015 from functools import reduce
0016
0017
0018 renaming = {
0019 'MessageLogger': 'Miscellanea', 'FourVector': 'Generic',
0020 'Castor': 'Castor Calorimeter', 'RPCDigisV': 'Resistive Plate Chambers',
0021 'GlobalRecHitsV': 'Miscellanea: Sim.', 'Top': 'Top', 'HLTJETMET': 'JetMet',
0022 'GlobalDigisV': 'Miscellanea: Sim.', 'L1TEMU': 'Level 1 Trigger',
0023 'TrackerRecHitsV': 'Tracking System', 'MuonDTHitsV': 'Muon Objects',
0024 'EcalDigisV': 'Ecal Calorimeter', 'EcalHitsV': 'Ecal Calorimeter',
0025 'Muons': 'Muon Objects', 'DT': 'Drift Tubes', 'TrackerDigisV': 'Tracking System',
0026 'Pixel': 'Tracking System', 'EcalPreshower': 'Ecal Calorimeter',
0027 'EgammaV': 'Photons', 'AlCaEcalPi0': 'Alca', 'SusyExo': 'SusyExo',
0028 'MuonDTDigisV': 'Muon Objects', 'TauRelVal': 'Tau',
0029 'HcalHitsV': 'Hcal Calorimeter', 'RPC': 'Resistive Plate Chambers',
0030 'EcalRecHitsV': 'Ecal Calorimeter', 'EgOffline': 'EGamma',
0031 'MuonCSCDigisV': 'Muon Objects', 'ParticleFlow': 'Miscellanea',
0032 'Info': 'Miscellanea', 'Tracking': 'Tracking',
0033 'NoiseRatesV': 'Miscellanea: Sim.', 'Generator': 'Miscellanea: Sim.',
0034 'Btag': 'B Tagging', 'Higgs': 'Higgs', 'GlobalHitsV': 'Miscellanea: Sim.',
0035 'HcalRecHitsV': 'Hcal Calorimeter', 'TrackerHitsV': 'Tracking System',
0036 'CSC': 'Cathode Strip Chambers', 'Muon,HLTMonMuon': 'Muon',
0037 'Hcal': 'Hcal Calorimeter', 'TauOffline': 'Tau',
0038 'HeavyFlavor': 'HeavyFlavor', 'JetMET': 'Jet', 'Physics': 'Miscellanea',
0039 'CaloTowersV': 'Hcal Calorimeter', 'SiStrip': 'Tracking System',
0040 'EcalClusterV': 'Ecal Calorimeter', 'HLTEgammaValidation': 'EGamma',
0041 'EcalPhiSym': 'Alca', 'L1T': 'Level 1 Trigger', 'MixingV': 'Miscellanea: Sim.',
0042 'FourVector_Val': 'Generic', 'EcalEndcap': 'Ecal Calorimeter',
0043 'TauOnline': 'Tau', 'Egamma': 'Photons', 'HcalIsoTrack': 'Alca',
0044 'EcalBarrel': 'Ecal Calorimeter'
0045 }
0046
0047
0048 def get_img_path(filename, path):
0049 '''Returns image path for https://cmsweb.cern.ch/dqm histogram
0050 visualisation service'''
0051 run = int(re.findall('_R(\\d*)__', filename)[0])
0052 parts = [e.rstrip('.root') for e in filename.split('__')]
0053 path = path.replace('Run summary/', '')
0054 return 'archive/%s/%s/%s/%s/%s' % (run, parts[1], parts[2], parts[3], path)
0055
0056
0057 def get_img_url(path, f1, f2=None, w=250, h=250):
0058 '''Returns full URL of histogram (or histogram overlay) image for
0059 https://cmsweb.cern.ch/dqm visualisation service.'''
0060 base = 'https://cmsweb.cern.ch/dqm/relval/plotfairy'
0061 if not f2:
0062 return '%s/%s?w=%s;h=%s' % (base, get_img_path(f1, path), w, h)
0063 return '%s/overlay?obj=%s;obj=%s;w=%s;h=%s' % (base,
0064 get_img_path(f1, path), get_img_path(f2, path), w, h)
0065
0066
0067 def get_dataset_name(name):
0068 '''Returns extracted dataset name from the given ROOT filename.'''
0069 if re.search('RelVal', name):
0070 run = str(int(re.findall('_R(\\d{9})_', name)[0]))
0071 ds = re.findall('GR_R_\\d*_V\\d*C?_(?:RelVal)?_([\\w\\d]*-v\\d+)_', name)[0]
0072 else:
0073 run, ds = re.findall('R(\\d{9})__([\\w\\d]*)__CMSSW_', name)[0:1]
0074 return '_'.join([ds, str(int(run))])
0075
0076
0077 def get_release(name):
0078 '''Returns extracted release from the given ROOT filename.'''
0079 return re.findall('R\\d{9}__([\\w\\d_-]*)__DQM.root', name)[0]
0080
0081
0082 def get_stats(c, threshold, dir_ranges):
0083 '''Returns ``successes``, ``fails``, ``nulls`` for the given dir_ranges.'''
0084 successes, nulls, fails = 0, 0, 0
0085 for from_id, till_id in dir_ranges:
0086 c.execute('''SELECT count(*) FROM HistogramComparison
0087 WHERE p_value >= 0 AND p_value > ? AND
0088 id >= ? and id <= ?''', (threshold, from_id, till_id))
0089 successes += c.fetchone()[0]
0090 c.execute('''SELECT count(*) FROM HistogramComparison WHERE
0091 p_value < 0 AND id >= ? AND id <= ?''', (from_id, till_id))
0092 nulls += c.fetchone()[0]
0093 c.execute('''SELECT count(*) FROM HistogramComparison
0094 WHERE p_value >= 0 AND p_value <= ? AND
0095 id >= ? AND id <= ?''', (threshold, from_id, till_id))
0096 fails += c.fetchone()[0]
0097 return successes, nulls, fails
0098
0099
0100 def get_percentage(successes, nulls, fails):
0101 '''Converts integers ``successes``, ``nulls`` and ``fails`` to percents.'''
0102 if successes is None:
0103 return None, None, None
0104 total = successes + fails + nulls
0105 if not total:
0106 return None, None, None
0107 success = round(100. * successes / total, 2)
0108 null = round(100. * nulls / total, 2)
0109 fail = round(100. * fails / total, 2)
0110 return success, null, fail
0111
0112
0113 def get_folders(c, file_id, filename, dir_id, threshold):
0114 '''Returns file folder stats for one "summary table" column.'''
0115 ds_name = get_dataset_name(filename)
0116 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM
0117 Directory WHERE parent_id=?''', (dir_id,))
0118 dirs = c.fetchall()
0119 file_folders = dict()
0120 total_successes, total_nulls, total_fails = 0, 0, 0
0121 for name, from_id, till_id in dirs:
0122 successes, nulls, fails = get_stats(c, threshold, ((from_id, till_id),))
0123 total_successes += successes
0124 total_nulls += nulls
0125 total_fails += fails
0126 if name in file_folders:
0127 file_folders[name].append([file_id, ds_name, successes, nulls, fails])
0128 else:
0129 file_folders[name] = [file_id, ds_name, successes, nulls, fails]
0130 return [('Summary', [file_id, ds_name, total_successes, total_nulls, total_fails])] + file_folders.items()
0131
0132
0133 def join_ranges(ranges, elem):
0134 '''To do less DB calls, joins [(from_id, till_id), ...] ranges.'''
0135 if isinstance(ranges, tuple):
0136 ranges = [ranges]
0137 if ranges[-1][-1] + 1 == elem[0]:
0138 ranges[-1] = (ranges[-1][0], elem[1])
0139 else:
0140 ranges.append(elem)
0141 return ranges
0142
0143
0144 def get_release_list(c):
0145 '''Returns all ``ReleaseComparisons`` found on database.'''
0146 c.execute('SELECT title, statistical_test FROM ReleaseComparison')
0147 return c.fetchall()
0148
0149
0150 def db_list_with_releases(path='.'):
0151 '''Returns available database list and their releases.'''
0152 db_list = [db for db in listdir(path) if db.endswith('.db')]
0153 db_list_with_releases = []
0154 for db in db_list:
0155 conn = sqlite3.connect(join(path, db))
0156 releases = get_release_list(conn.cursor())
0157 db_list_with_releases.append((db[:-3], releases))
0158 conn.close()
0159 return db_list_with_releases
0160
0161
0162
0163 def get_release_summary_stats(c, release_title, st_test, threshold=1e-5):
0164 '''Returns context for ``release_summary.html`` template.'''
0165
0166 context = dict()
0167 c.execute('''SELECT release1, release2, id FROM ReleaseComparison
0168 WHERE title = ? AND statistical_test = ?''', (release_title, st_test))
0169 context['release1'], context['release2'], release_comp_id = c.fetchone()
0170
0171
0172 c.execute('''SELECT from_histogram_id, till_histogram_id FROM Directory
0173 WHERE id IN (SELECT directory_id FROM RootFileComparison
0174 WHERE release_comparison_id = ?)''', (release_comp_id,))
0175 dir_ranges = c.fetchall()
0176
0177 if len(dir_ranges) > 1:
0178 dir_ranges = reduce(join_ranges, dir_ranges)
0179
0180 context['successes'], context['nulls'], context['fails'], = get_stats(c, threshold, dir_ranges)
0181
0182 context['total'] = context['successes'] + context['fails'] + context['nulls']
0183 if context['total']:
0184 context['success'], context['null'], context['fail'] = \
0185 get_percentage(context['successes'], context['nulls'], context['fails'])
0186
0187
0188 c.execute('''SELECT id, filename1, directory_id FROM RootFileComparison
0189 WHERE release_comparison_id = ?''', (release_comp_id,))
0190 files = c.fetchall()
0191
0192
0193 folders = dict()
0194 for file_id, filename, dir_id in files:
0195
0196 file_folders = get_folders(c, file_id, filename, dir_id, threshold)
0197 for folder_name, file_folder_stats in file_folders:
0198 if folder_name in folders:
0199
0200 folders[folder_name].append(file_folder_stats)
0201
0202 folders[folder_name][0][2] += file_folder_stats[2]
0203 folders[folder_name][0][3] += file_folder_stats[3]
0204 folders[folder_name][0][4] += file_folder_stats[4]
0205 else:
0206 folder_summary = [None, 'Summary', file_folder_stats[2],
0207 file_folder_stats[3], file_folder_stats[4]]
0208 folders[folder_name] = [folder_summary, file_folder_stats]
0209
0210
0211 folders = [('Summary', folders.pop('Summary'))] + sorted(folders.items(), key=lambda x: x[0])
0212 for folder, file_stats in folders:
0213
0214 if len(file_stats) != len(files)+1:
0215 for i, file_ in enumerate(files):
0216 if file_[0] != file_stats[i][0]:
0217 file_stats = file_stats[:i] + [[None, "N/A", None, None, None]] + file_stats[i:]
0218
0219 for i, stats in enumerate(file_stats):
0220 stats[2], stats[3], stats[4] = get_percentage(*stats[2:5])
0221 context['folders'] = folders
0222
0223
0224
0225 for folder in folders:
0226 print(folder)
0227
0228
0229
0230
0231
0232
0233 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0234 WHERE parent_id IN (SELECT directory_id FROM RootFileComparison
0235 WHERE release_comparison_id = ?)''', (release_comp_id,))
0236 lvl3_dir_ranges = c.fetchall()
0237
0238 cum_lvl3_dir_ranges = dict()
0239 for name, from_id, till_id in lvl3_dir_ranges:
0240 if name in cum_lvl3_dir_ranges:
0241 cum_lvl3_dir_ranges[name].append((from_id, till_id))
0242 else:
0243 cum_lvl3_dir_ranges[name] = [(from_id, till_id)]
0244
0245
0246 summary_stats = dict()
0247 detailed_stats = dict()
0248 for name, ranges in cum_lvl3_dir_ranges.items():
0249 successes, nulls, fails = get_stats(c, threshold, ranges)
0250 if name in detailed_stats:
0251 detailed_stats[name][0] += successes
0252 detailed_stats[name][1] += nulls
0253 detailed_stats[name][2] += fails
0254 else:
0255 detailed_stats[name] = [successes, nulls, fails]
0256 if name in renaming:
0257 if renaming[name] in summary_stats:
0258 summary_stats[renaming[name]][0] += successes
0259 summary_stats[renaming[name]][1] += nulls
0260 summary_stats[renaming[name]][2] += fails
0261 else:
0262 summary_stats[renaming[name]] = [successes, nulls, fails]
0263
0264
0265 summary_ratios = []
0266 for name, stats in summary_stats.items():
0267 total = sum(stats)
0268 if total:
0269 ratio = float(stats[0]) / sum(stats)
0270 summary_ratios.append((name, ratio))
0271 detailed_ratios = []
0272 for name, stats in detailed_stats.items():
0273 total = sum(stats)
0274 if total:
0275 ratio = float(stats[0]) / sum(stats)
0276 detailed_ratios.append((name, ratio))
0277
0278 context['summary_ratios'] = sorted(summary_ratios, key=lambda x: x[0])
0279 context['detailed_ratios'] = sorted(detailed_ratios, key=lambda x: x[0])
0280 return context
0281
0282
0283 def get_directory_summary_stats(c, url_args, file_id, threshold):
0284 '''Returns context for ``directory_summary.html`` template.'''
0285 context = dict()
0286 c.execute('''SELECT directory_id, filename1, filename2 FROM RootFileComparison
0287 WHERE id = ?''', (file_id,))
0288 dir_id, f1, f2 = c.fetchone()
0289 context['release1'] = get_release(f1)
0290 context['release2'] = get_release(f2)
0291 if not url_args:
0292 dir_name = get_dataset_name(f1)
0293 else:
0294
0295 directory_names = []
0296
0297 for dir_name in url_args:
0298 c.execute('''SELECT id, name FROM Directory WHERE name = ? AND
0299 parent_id = ?''', (dir_name, dir_id))
0300 dir_id, name = c.fetchone()
0301 directory_names.append(name)
0302 context['parent_name'] = '/'.join(directory_names)
0303
0304
0305 c.execute('''SELECT from_histogram_id, till_histogram_id FROM
0306 Directory WHERE id = ?''', (dir_id,))
0307 ranges = c.fetchone()
0308 successes, nulls, fails = get_stats(c, threshold, (ranges,))
0309 success, null, fail = get_percentage(successes, nulls, fails)
0310 context.update({
0311 'successes': successes, 'nulls': nulls, 'fails': fails,
0312 'success': success, 'null': null, 'fail': fail,
0313 'total': successes + nulls + fails, 'dir_name': dir_name
0314 })
0315
0316 c.execute('''SELECT name, from_histogram_id, till_histogram_id FROM Directory
0317 WHERE parent_id = ?''', (dir_id,))
0318 subdirs = c.fetchall()
0319 subdir_stats = []
0320 for name, from_id, till_id in subdirs:
0321 successes, nulls, fails = get_stats(c, threshold, [(from_id, till_id,)])
0322 success, null, fail = get_percentage(successes, nulls, fails)
0323 subdir_stats.append((name, successes + nulls + fails, successes,
0324 nulls, fails, success, null, fail))
0325 context['subdirs'] = sorted(subdir_stats, key=lambda x: x[4], reverse=True)
0326
0327
0328 c.execute('''SELECT name, p_value FROM HistogramComparison
0329 WHERE directory_id = ?''', (dir_id,))
0330 failed_histos = []
0331 successful_histos = []
0332 null_histos = []
0333 for name, p_value in c.fetchall():
0334 path = quote('%s/%s' % ('/'.join(url_args), name))
0335 url1 = get_img_url(path, f1)
0336 url2 = get_img_url(path, f2)
0337 overlay = get_img_url(path, f1, f2)
0338 if p_value < 0:
0339 null_histos.append((name, p_value, url1, url2, overlay))
0340 elif p_value <= threshold:
0341 failed_histos.append((name, p_value, url1, url2, overlay))
0342 else:
0343 successful_histos.append((name, p_value, url1, url2, overlay))
0344
0345 context['failed_histos'] = sorted(failed_histos, key=lambda x: x[1], reverse=True)
0346 context['null_histos'] = null_histos
0347 context['successful_histos'] = sorted(successful_histos, key=lambda x: x[1], reverse=True)
0348 return context