File indexing completed on 2023-03-17 10:57:44
0001 from __future__ import print_function
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 import os, sys, re
0034 import httplib, urllib, urllib2, types, string
0035 import Utilities.General.cmssw_das_client as das_client
0036 import json
0037 from json import loads, dumps
0038
0039 if 'DD_SOURCE' not in os.environ:
0040 os.environ['DD_SOURCE'] = 'das'
0041 if 'DD_RELEASE' not in os.environ:
0042 os.environ['DD_RELEASE'] = ''
0043 if 'DD_SAMPLE' not in os.environ:
0044 os.environ['DD_SAMPLE'] = ''
0045 if 'DD_COND' not in os.environ:
0046 os.environ['DD_COND'] = ''
0047 if 'DD_TIER' not in os.environ:
0048 os.environ['DD_TIER'] = ''
0049 if 'DD_TIER_SECONDARY' not in os.environ:
0050 os.environ['DD_TIER_SECONDARY'] = ''
0051 if 'DD_RUN' not in os.environ:
0052 os.environ['DD_RUN'] = ''
0053
0054 dd_release_re = re.compile(os.environ['DD_RELEASE'].replace('*', '.*'));
0055 dd_sample_re = re.compile(os.environ['DD_SAMPLE'].replace('*', '.*'));
0056 dd_cond_re = re.compile(os.environ['DD_COND'].replace('*', '.*'));
0057 dd_run_re = re.compile(os.environ['DD_RUN'].replace('*', '.*'));
0058
0059
0060 def common_search(dd_tier):
0061 dd_tier_re = re.compile(dd_tier.replace('*', '.*'));
0062
0063 if os.environ['DD_SOURCE'] == "das":
0064
0065 query = "dataset instance=cms_dbs_prod_global"
0066 if os.environ['DD_RELEASE'] != "":
0067 query = query + " release=" + os.environ['DD_RELEASE']
0068 if os.environ['DD_SAMPLE'] != "":
0069 query = query + " primary_dataset=" + os.environ['DD_SAMPLE']
0070 if dd_tier != "":
0071 query = query + " tier=" + dd_tier
0072 if os.environ['DD_COND'] != "":
0073 query = query + " dataset=*" + os.environ['DD_COND'] + "*"
0074 if os.environ['DD_RUN'] != "":
0075 query = query + " run=" + os.environ['DD_RUN']
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088 data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))
0089
0090 if data['nresults'] == 0:
0091 print('[electronDataDiscovery.py] No DAS dataset for query:', query)
0092 return []
0093 while data['nresults'] > 1:
0094 if data['data'][0]['dataset'][0]['name'] == data['data'][1]['dataset'][0]['name']:
0095 data['data'].pop(0)
0096 data['nresults'] -= 1
0097 else:
0098 print('[electronDataDiscovery.py] Several DAS datasets for query:', query)
0099 for i in range(data['nresults']):
0100 print(
0101 '[electronDataDiscovery.py] dataset[' + str(i) + ']: ' + data['data'][i]['dataset'][0]['name'])
0102 return []
0103
0104 dataset = data['data'][0]['dataset'][0]['name']
0105
0106 query = "file instance=cms_dbs_prod_global dataset=" + dataset
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117 data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))
0118
0119 if data['nresults'] == 0:
0120 print('[electronDataDiscovery.py] No DAS file in dataset:', dataset)
0121 return []
0122 else:
0123 print('there is %d results' % nresults)
0124
0125 result = []
0126 for i in range(0, data['nresults']):
0127 result.append(str(data['data'][i]['file'][0]['name']))
0128
0129 elif os.environ['DD_SOURCE'] == "dbs":
0130
0131 input = "find file"
0132 separator = " where "
0133 if os.environ['DD_RELEASE'] != "":
0134 input = input + separator + "release = " + os.environ['DD_RELEASE']
0135 separator = " and "
0136 if os.environ['DD_SAMPLE'] != "":
0137 input = input + separator + "primds = " + os.environ['DD_SAMPLE']
0138 separator = " and "
0139 if os.environ['DD_RUN'] != "":
0140 input = input + separator + "run = " + os.environ['DD_RUN']
0141 separator = " and "
0142 input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
0143
0144 data = os.popen(
0145 'dbs search --url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "' + input + '"')
0146 datalines = data.readlines()
0147 data.close()
0148 result = []
0149 for line in datalines:
0150 line = line.rstrip()
0151 if line != "" and line[0] == "/":
0152 result.append(line)
0153
0154 elif os.environ['DD_SOURCE'] == "http":
0155
0156 input = "find file"
0157 separator = " where "
0158 if os.environ['DD_RELEASE'] != "":
0159 input = input + separator + "release = " + os.environ['DD_RELEASE']
0160 separator = " and "
0161 if os.environ['DD_SAMPLE'] != "":
0162 input = input + separator + "primds = " + os.environ['DD_SAMPLE']
0163 separator = " and "
0164 if os.environ['DD_RUN'] != "":
0165 input = input + separator + "run = " + os.environ['DD_RUN']
0166 separator = " and "
0167 input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
0168
0169 url = "https://cmsweb.cern.ch:443/dbs_discovery/aSearch"
0170 final_input = urllib.quote(input);
0171
0172 agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
0173 ctypes = "text/plain"
0174 headers = {'User-Agent': agent, 'Accept': ctypes}
0175 params = {'dbsInst': 'cms_dbs_prod_global',
0176 'html': 0, 'caseSensitive': 'on', '_idx': 0, 'pagerStep': -1,
0177 'userInput': final_input,
0178 'xml': 0, 'details': 0, 'cff': 0, 'method': 'dbsapi'}
0179 data = urllib.urlencode(params, doseq=True)
0180 req = urllib2.Request(url, data, headers)
0181 data = ""
0182
0183 try:
0184 response = urllib2.urlopen(req)
0185 data = response.read()
0186 except urllib2.HTTPError as e:
0187 if e.code == 201:
0188 print(e.headers)
0189 print(e.msg)
0190 pass
0191 else:
0192 raise e
0193
0194 datalines = data.readlines()
0195 data.close()
0196 result = []
0197 for line in datalines:
0198 line = line.rstrip()
0199 if line != "" and line[0] == "/":
0200 result.append(line)
0201
0202 elif os.environ['DD_SOURCE'] == "lsf":
0203
0204 dbs_path = '/' + os.environ['DD_SAMPLE'] + '/' + os.environ['DD_RELEASE'] + '-' + os.environ['DD_COND'] + '/' + \
0205 os.environ['DD_TIER'] + '"'
0206 if __name__ == "__main__":
0207 print('dbs path:', dbs_path)
0208 data = os.popen('dbs lsf --path="' + dbs_path + '"')
0209 datalines = data.readlines()
0210 data.close()
0211 result = []
0212 for line in datalines:
0213 line = line.rstrip()
0214 if line != "" and line[0] == "/":
0215 result.append(line)
0216
0217 elif os.environ['DD_SOURCE'].startswith('/castor/cern.ch/cms/'):
0218
0219 castor_dir = os.environ['DD_SOURCE'].replace('/castor/cern.ch/cms/', '/', 1)
0220 result = []
0221 data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir)
0222 subdirs = data.readlines()
0223 data.close()
0224 datalines = []
0225 for line in subdirs:
0226 line = line.rstrip()
0227 subdir = line.split()[8]
0228 data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir + '/' + subdir)
0229 datalines = data.readlines()
0230 for line in datalines:
0231 line = line.rstrip()
0232 file = line.split()[8]
0233 if file != "":
0234 result.append(castor_dir + '/' + subdir + '/' + file)
0235 data.close()
0236
0237 elif os.environ['DD_SOURCE'].startswith('/eos/cms/'):
0238
0239 data = os.popen('eos find -f ' + os.environ['DD_SOURCE'])
0240 lines = data.readlines()
0241 data.close()
0242 result = []
0243 for line in lines:
0244 line = line.strip().replace('/eos/cms/', '/', 1)
0245 if line == "": continue
0246 if dd_sample_re.search(line) == None: continue
0247 if dd_cond_re.search(line) == None: continue
0248 if dd_tier_re.search(line) == None: continue
0249 if dd_run_re.search(line) == None: continue
0250 result.append(line)
0251
0252 else:
0253
0254 result = []
0255 for line in open(os.environ['DD_SOURCE']).readlines():
0256 line = os.path.expandvars(line.strip())
0257 if line == "": continue
0258 if dd_sample_re.search(line) == None: continue
0259 if dd_cond_re.search(line) == None: continue
0260 if dd_tier_re.search(line) == None: continue
0261 if dd_run_re.search(line) == None: continue
0262 result.append(line)
0263
0264 if len(result) == 0:
0265 diag = '[electronDataDiscovery.py] No more files after filtering with :'
0266 if os.environ['DD_SAMPLE'] != '': diag += ' ' + os.environ['DD_SAMPLE']
0267 if os.environ['DD_COND'] != '': diag += ' ' + os.environ['DD_COND']
0268 if dd_tier != '': diag += ' ' + dd_tier
0269 if os.environ['DD_RUN'] != '': diag += ' ' + os.environ['DD_RUN']
0270 print(diag)
0271
0272 return result
0273
0274
0275 def search():
0276 print('search in %s' % 'DD_TIER')
0277 return common_search(os.environ['DD_TIER'])
0278
0279
0280 def search2():
0281 return common_search(os.environ['DD_TIER_SECONDARY'])
0282
0283
0284 def getCMSdata(data, dbs="prod/global"):
0285
0286 cmd = 'dasgoclient --query="file dataset=DATA instance=DBS" | sort'
0287 cmd2 = cmd.replace('DATA', data).replace('DBS', dbs)
0288 files = os.popen(cmd2).read()
0289
0290 flist = files.split('\n')
0291 del flist[-1]
0292 return flist