File indexing completed on 2024-11-26 02:34:11
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 import os, sys, re
0033 import httplib, urllib, urllib2, types, string
0034 import Utilities.General.cmssw_das_client as das_client
0035 import json
0036 from json import loads, dumps
0037
0038 if 'DD_SOURCE' not in os.environ:
0039 os.environ['DD_SOURCE'] = 'das'
0040 if 'DD_RELEASE' not in os.environ:
0041 os.environ['DD_RELEASE'] = ''
0042 if 'DD_SAMPLE' not in os.environ:
0043 os.environ['DD_SAMPLE'] = ''
0044 if 'DD_COND' not in os.environ:
0045 os.environ['DD_COND'] = ''
0046 if 'DD_TIER' not in os.environ:
0047 os.environ['DD_TIER'] = ''
0048 if 'DD_TIER_SECONDARY' not in os.environ:
0049 os.environ['DD_TIER_SECONDARY'] = ''
0050 if 'DD_RUN' not in os.environ:
0051 os.environ['DD_RUN'] = ''
0052
0053 dd_release_re = re.compile(os.environ['DD_RELEASE'].replace('*', '.*'));
0054 dd_sample_re = re.compile(os.environ['DD_SAMPLE'].replace('*', '.*'));
0055 dd_cond_re = re.compile(os.environ['DD_COND'].replace('*', '.*'));
0056 dd_run_re = re.compile(os.environ['DD_RUN'].replace('*', '.*'));
0057
0058
0059 def common_search(dd_tier):
0060 dd_tier_re = re.compile(dd_tier.replace('*', '.*'));
0061
0062 if os.environ['DD_SOURCE'] == "das":
0063
0064 query = "dataset instance=cms_dbs_prod_global"
0065 if os.environ['DD_RELEASE'] != "":
0066 query = query + " release=" + os.environ['DD_RELEASE']
0067 if os.environ['DD_SAMPLE'] != "":
0068 query = query + " primary_dataset=" + os.environ['DD_SAMPLE']
0069 if dd_tier != "":
0070 query = query + " tier=" + dd_tier
0071 if os.environ['DD_COND'] != "":
0072 query = query + " dataset=*" + os.environ['DD_COND'] + "*"
0073 if os.environ['DD_RUN'] != "":
0074 query = query + " run=" + os.environ['DD_RUN']
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087 data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))
0088
0089 if data['nresults'] == 0:
0090 print('[electronDataDiscovery.py] No DAS dataset for query:', query)
0091 return []
0092 while data['nresults'] > 1:
0093 if data['data'][0]['dataset'][0]['name'] == data['data'][1]['dataset'][0]['name']:
0094 data['data'].pop(0)
0095 data['nresults'] -= 1
0096 else:
0097 print('[electronDataDiscovery.py] Several DAS datasets for query:', query)
0098 for i in range(data['nresults']):
0099 print(
0100 '[electronDataDiscovery.py] dataset[' + str(i) + ']: ' + data['data'][i]['dataset'][0]['name'])
0101 return []
0102
0103 dataset = data['data'][0]['dataset'][0]['name']
0104
0105 query = "file instance=cms_dbs_prod_global dataset=" + dataset
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 data = das_client.json.loads(das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))
0117
0118 if data['nresults'] == 0:
0119 print('[electronDataDiscovery.py] No DAS file in dataset:', dataset)
0120 return []
0121 else:
0122 print('there is %d results' % nresults)
0123
0124 result = []
0125 for i in range(0, data['nresults']):
0126 result.append(str(data['data'][i]['file'][0]['name']))
0127
0128 elif os.environ['DD_SOURCE'] == "dbs":
0129
0130 input = "find file"
0131 separator = " where "
0132 if os.environ['DD_RELEASE'] != "":
0133 input = input + separator + "release = " + os.environ['DD_RELEASE']
0134 separator = " and "
0135 if os.environ['DD_SAMPLE'] != "":
0136 input = input + separator + "primds = " + os.environ['DD_SAMPLE']
0137 separator = " and "
0138 if os.environ['DD_RUN'] != "":
0139 input = input + separator + "run = " + os.environ['DD_RUN']
0140 separator = " and "
0141 input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
0142
0143 data = os.popen(
0144 'dbs search --url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "' + input + '"')
0145 datalines = data.readlines()
0146 data.close()
0147 result = []
0148 for line in datalines:
0149 line = line.rstrip()
0150 if line != "" and line[0] == "/":
0151 result.append(line)
0152
0153 elif os.environ['DD_SOURCE'] == "http":
0154
0155 input = "find file"
0156 separator = " where "
0157 if os.environ['DD_RELEASE'] != "":
0158 input = input + separator + "release = " + os.environ['DD_RELEASE']
0159 separator = " and "
0160 if os.environ['DD_SAMPLE'] != "":
0161 input = input + separator + "primds = " + os.environ['DD_SAMPLE']
0162 separator = " and "
0163 if os.environ['DD_RUN'] != "":
0164 input = input + separator + "run = " + os.environ['DD_RUN']
0165 separator = " and "
0166 input = input + separator + "dataset like *" + os.environ['DD_COND'] + "*" + dd_tier + "*"
0167
0168 url = "https://cmsweb.cern.ch:443/dbs_discovery/aSearch"
0169 final_input = urllib.quote(input);
0170
0171 agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
0172 ctypes = "text/plain"
0173 headers = {'User-Agent': agent, 'Accept': ctypes}
0174 params = {'dbsInst': 'cms_dbs_prod_global',
0175 'html': 0, 'caseSensitive': 'on', '_idx': 0, 'pagerStep': -1,
0176 'userInput': final_input,
0177 'xml': 0, 'details': 0, 'cff': 0, 'method': 'dbsapi'}
0178 data = urllib.urlencode(params, doseq=True)
0179 req = urllib2.Request(url, data, headers)
0180 data = ""
0181
0182 try:
0183 response = urllib2.urlopen(req)
0184 data = response.read()
0185 except urllib2.HTTPError as e:
0186 if e.code == 201:
0187 print(e.headers)
0188 print(e.msg)
0189 pass
0190 else:
0191 raise e
0192
0193 datalines = data.readlines()
0194 data.close()
0195 result = []
0196 for line in datalines:
0197 line = line.rstrip()
0198 if line != "" and line[0] == "/":
0199 result.append(line)
0200
0201 elif os.environ['DD_SOURCE'] == "lsf":
0202
0203 dbs_path = '/' + os.environ['DD_SAMPLE'] + '/' + os.environ['DD_RELEASE'] + '-' + os.environ['DD_COND'] + '/' + \
0204 os.environ['DD_TIER'] + '"'
0205 if __name__ == "__main__":
0206 print('dbs path:', dbs_path)
0207 data = os.popen('dbs lsf --path="' + dbs_path + '"')
0208 datalines = data.readlines()
0209 data.close()
0210 result = []
0211 for line in datalines:
0212 line = line.rstrip()
0213 if line != "" and line[0] == "/":
0214 result.append(line)
0215
0216 elif os.environ['DD_SOURCE'].startswith('/castor/cern.ch/cms/'):
0217
0218 castor_dir = os.environ['DD_SOURCE'].replace('/castor/cern.ch/cms/', '/', 1)
0219 result = []
0220 data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir)
0221 subdirs = data.readlines()
0222 data.close()
0223 datalines = []
0224 for line in subdirs:
0225 line = line.rstrip()
0226 subdir = line.split()[8]
0227 data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir + '/' + subdir)
0228 datalines = data.readlines()
0229 for line in datalines:
0230 line = line.rstrip()
0231 file = line.split()[8]
0232 if file != "":
0233 result.append(castor_dir + '/' + subdir + '/' + file)
0234 data.close()
0235
0236 elif os.environ['DD_SOURCE'].startswith('/eos/cms/'):
0237
0238 data = os.popen('eos find -f ' + os.environ['DD_SOURCE'])
0239 lines = data.readlines()
0240 data.close()
0241 result = []
0242 for line in lines:
0243 line = line.strip().replace('/eos/cms/', '/', 1)
0244 if line == "": continue
0245 if dd_sample_re.search(line) == None: continue
0246 if dd_cond_re.search(line) == None: continue
0247 if dd_tier_re.search(line) == None: continue
0248 if dd_run_re.search(line) == None: continue
0249 result.append(line)
0250
0251 else:
0252
0253 result = []
0254 for line in open(os.environ['DD_SOURCE']).readlines():
0255 line = os.path.expandvars(line.strip())
0256 if line == "": continue
0257 if dd_sample_re.search(line) == None: continue
0258 if dd_cond_re.search(line) == None: continue
0259 if dd_tier_re.search(line) == None: continue
0260 if dd_run_re.search(line) == None: continue
0261 result.append(line)
0262
0263 if len(result) == 0:
0264 diag = '[electronDataDiscovery.py] No more files after filtering with :'
0265 if os.environ['DD_SAMPLE'] != '': diag += ' ' + os.environ['DD_SAMPLE']
0266 if os.environ['DD_COND'] != '': diag += ' ' + os.environ['DD_COND']
0267 if dd_tier != '': diag += ' ' + dd_tier
0268 if os.environ['DD_RUN'] != '': diag += ' ' + os.environ['DD_RUN']
0269 print(diag)
0270
0271 return result
0272
0273
0274 def search():
0275 print('search in %s' % 'DD_TIER')
0276 return common_search(os.environ['DD_TIER'])
0277
0278
0279 def search2():
0280 return common_search(os.environ['DD_TIER_SECONDARY'])
0281
0282
0283 def getCMSdata(data, dbs="prod/global"):
0284
0285 cmd = 'dasgoclient --query="file dataset=DATA instance=DBS" | sort'
0286 cmd2 = cmd.replace('DATA', data).replace('DBS', dbs)
0287 files = os.popen(cmd2).read()
0288
0289 flist = files.split('\n')
0290 del flist[-1]
0291 return flist