File indexing completed on 2023-03-17 10:59:04
0001 import re
0002
0003
0004
0005
0006 RXDATASET = re.compile(r"^(/[-A-Za-z0-9_]+){3}$")
0007
0008 RXRELVALMC = re.compile(r"^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*$")
0009 RXRELVALRUNDEPMC = re.compile(r"^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*rundepMC.*$")
0010 RXRELVALDATA = re.compile(r"^/[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*$")
0011 RXRUNDEPMC = re.compile(r"^/(?!RelVal)[^/]+/.*rundepMC.*$")
0012
0013
0014 RXONLINE = re.compile(r"^(?:.*/)?DQM_V(\d+)(_[A-Za-z0-9]+)?_R(\d+)\.root$")
0015
0016
0017 RXOFFLINE = re.compile(r"^(?:.*/)?DQM_V(\d+)_R(\d+)((?:__[-A-Za-z0-9_]+){3})\.root$")
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 def classifyDQMFile(path):
0030 print(path)
0031 try:
0032 m = re.match(RXONLINE, path)
0033 if m:
0034 version = int(m.group(1))
0035 runnr = int(m.group(3))
0036 subsys = m.group(2) and m.group(2)[1:]
0037 if version != 1:
0038 return False, "file version is not 1"
0039 elif runnr <= 10000:
0040 return False, "online file has run number <= 10000"
0041 else:
0042
0043 return True, { 'class': 'online_data', 'version': version,
0044 'subsystem': subsys, 'runnr': runnr,
0045 'dataset': "/Global/Online/ALL" }
0046
0047 m = re.match(RXOFFLINE, path)
0048 if m:
0049 version = int(m.group(1))
0050 dataset = m.group(3).replace("__", "/")
0051 if not re.match(RXDATASET, dataset):
0052 return False, "Invalid dataset name"
0053 relvalmc = re.match(RXRELVALMC, dataset)
0054 relvaldata = re.match(RXRELVALDATA, dataset)
0055 relvalrundepmc = re.match(RXRELVALRUNDEPMC, dataset)
0056 rundepmc = re.match(RXRUNDEPMC, dataset)
0057 runnr = int(m.group(2))
0058 if version != 1:
0059 return False, "file version is not 1"
0060 if runnr < 1:
0061 return False, "file matches offline naming, but run number is < 1"
0062 elif rundepmc:
0063 if runnr == 1:
0064 return False, "file matches Run Dependent MonteCarlo naming, but run number is 1"
0065 else:
0066
0067 return True, { 'class': 'simulated_rundep', 'version': version,
0068 'runnr': runnr, 'dataset': dataset }
0069 elif relvalrundepmc:
0070 if runnr == 1:
0071 return False, "file matches Run Dependent MonteCarlo naming, but run number is 1"
0072 else:
0073
0074 return True, { 'class': 'relval_rundepmc', 'version': version,
0075 'runnr': runnr, 'dataset': dataset,
0076 'release': relvalrundepmc.group(1)}
0077 elif relvalmc:
0078 if runnr != 1:
0079 return False, "file matches relval mc naming, but run number != 1"
0080 else:
0081
0082 return True, { 'class': 'relval_mc', 'version': version,
0083 'runnr': runnr, 'dataset': dataset,
0084 'release': relvalmc.group(1) }
0085 elif relvaldata:
0086 if runnr == 1:
0087 return False, "file matches relval data naming, but run number = 1"
0088 else:
0089
0090 return True, { 'class': 'relval_data', 'version': version,
0091 'runnr': runnr, 'dataset': dataset,
0092 'release': relvaldata.group(1) }
0093 elif dataset.find("CMSSW") >= 0:
0094 return False, "non-relval dataset name contains 'CMSSW'"
0095 elif runnr > 1:
0096
0097 return True, { 'class': 'offline_data', 'version': version,
0098 'runnr': runnr, 'dataset': dataset }
0099 else:
0100
0101 return True, { 'class': 'simulated', 'version': int(m.group(1)),
0102 'runnr': runnr, 'dataset': dataset }
0103
0104 return False, "file matches no known naming convention"
0105 except:
0106 return False, "error while classifying file name"
0107