File indexing completed on 2024-04-06 12:01:54
0001
0002 '''Script that uploads to the new CMS conditions uploader.
0003 Adapted to the new infrastructure from v6 of the upload.py script for the DropBox from Miguel Ojeda.
0004 '''
0005
0006 __author__ = 'Andreas Pfeiffer'
0007 __copyright__ = 'Copyright 2015, CERN CMS'
0008 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
0009 __license__ = 'Unknown'
0010 __maintainer__ = 'Andreas Pfeiffer'
0011 __email__ = 'andreas.pfeiffer@cern.ch'
0012
0013
0014 import os
0015 import sys
0016 import optparse
0017 import hashlib
0018 import tarfile
0019 import netrc
0020 import getpass
0021 import errno
0022 import sqlite3
0023 import json
0024 import tempfile
0025
0026 defaultBackend = 'online'
0027 defaultHostname = 'cms-conddb-prod.cern.ch'
0028 defaultDevHostname = 'cms-conddb-dev.cern.ch'
0029 defaultUrlTemplate = 'https://%s/cmsDbUpload/'
0030 defaultTemporaryFile = 'upload.tar.bz2'
0031 defaultNetrcHost = 'ConditionUploader'
0032 defaultWorkflow = 'offline'
0033
0034
0035 import time
0036 import logging
0037 import cStringIO
0038
0039 import pycurl
0040 import socket
0041 import copy
0042
0043
0044 class HTTPError(Exception):
0045 '''A common HTTP exception.
0046
0047 self.code is the response HTTP code as an integer.
0048 self.response is the response body (i.e. page).
0049 '''
0050
0051 def __init__(self, code, response):
0052 self.code = code
0053 self.response = response
0054
0055
0056 try:
0057 self.args = (response.split('<p>')[1].split('</p>')[0], )
0058 except Exception:
0059 self.args = (self.response, )
0060
0061
0062 CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
0063
0064 class HTTP(object):
0065 '''Class used for querying URLs using the HTTP protocol.
0066 '''
0067
0068 retryCodes = frozenset([502, 503])
0069
0070 def __init__(self):
0071 self.setBaseUrl()
0072 self.setRetries()
0073
0074 self.curl = pycurl.Curl()
0075 self.curl.setopt(self.curl.COOKIEFILE, '')
0076
0077
0078
0079
0080 self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
0081 self.curl.setopt(self.curl.SSL_VERIFYHOST, 2)
0082
0083 self.baseUrl = None
0084
0085 self.token = None
0086
0087 def getCookies(self):
0088 '''Returns the list of cookies.
0089 '''
0090 return self.curl.getinfo(self.curl.INFO_COOKIELIST)
0091
0092 def discardCookies(self):
0093 '''Discards cookies.
0094 '''
0095 self.curl.setopt(self.curl.COOKIELIST, 'ALL')
0096
0097
0098 def setBaseUrl(self, baseUrl = ''):
0099 '''Allows to set a base URL which will be prefixed to all the URLs
0100 that will be queried later.
0101 '''
0102 self.baseUrl = baseUrl
0103
0104
0105 def setProxy(self, proxy = ''):
0106 '''Allows to set a proxy.
0107 '''
0108 self.curl.setopt(self.curl.PROXY, proxy)
0109
0110
0111 def setTimeout(self, timeout = 0):
0112 '''Allows to set a timeout.
0113 '''
0114 self.curl.setopt(self.curl.TIMEOUT, timeout)
0115
0116
0117 def setRetries(self, retries = ()):
0118 '''Allows to set retries.
0119
0120 The retries are a sequence of the seconds to wait per retry.
0121
0122 The retries are done on:
0123 * PyCurl errors (includes network problems, e.g. not being able
0124 to connect to the host).
0125 * 502 Bad Gateway (for the moment, to avoid temporary
0126 Apache-CherryPy issues).
0127 * 503 Service Temporarily Unavailable (for when we update
0128 the frontends).
0129 '''
0130 self.retries = retries
0131
0132 def getToken(self, username, password):
0133
0134 url = self.baseUrl + 'token'
0135
0136 self.curl.setopt(pycurl.URL, url)
0137 self.curl.setopt(pycurl.VERBOSE, 0)
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0148
0149 self.curl.setopt(self.curl.HTTPGET, 0)
0150
0151 response = cStringIO.StringIO()
0152 self.curl.setopt(pycurl.WRITEFUNCTION, response.write)
0153 self.curl.setopt(pycurl.USERPWD, '%s:%s' % (username, password) )
0154
0155 logging.debug('going to connect to server at: %s' % url )
0156
0157 self.curl.perform()
0158 code = self.curl.getinfo(pycurl.RESPONSE_CODE)
0159 logging.debug('got: %s ', str(code))
0160
0161 try:
0162 self.token = json.loads( response.getvalue() )['token']
0163 except Exception as e:
0164 logging.error('http::getToken> got error from server: %s ', str(e) )
0165 if 'No JSON object could be decoded' in str(e):
0166 return None
0167 logging.error("error getting token: %s", str(e))
0168 return None
0169
0170 logging.debug('token: %s', self.token)
0171 logging.debug('returning: %s', response.getvalue())
0172
0173 return response.getvalue()
0174
0175 def query(self, url, data = None, files = None, keepCookies = True):
0176 '''Queries a URL, optionally with some data (dictionary).
0177
0178 If no data is specified, a GET request will be used.
0179 If some data is specified, a POST request will be used.
0180
0181 If files is specified, it must be a dictionary like data but
0182 the values are filenames.
0183
0184 By default, cookies are kept in-between requests.
0185
0186 A HTTPError exception is raised if the response's HTTP code is not 200.
0187 '''
0188
0189 if not keepCookies:
0190 self.discardCookies()
0191
0192 url = self.baseUrl + url
0193
0194
0195 data4log = copy.copy(data)
0196 if data4log:
0197 if 'password' in data4log.keys():
0198 data4log['password'] = '*'
0199
0200 retries = [0] + list(self.retries)
0201
0202 while True:
0203 logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
0204
0205 time.sleep(retries.pop(0))
0206
0207 try:
0208 self.curl.setopt(self.curl.URL, url)
0209 self.curl.setopt(self.curl.HTTPGET, 1)
0210
0211
0212 self.curl.setopt(pycurl.USERPWD, '%s:""' % ( str(self.token), ) )
0213 self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0214
0215 if data is not None or files is not None:
0216
0217
0218 finalData = {}
0219
0220 if data is not None:
0221 finalData.update(data)
0222
0223 if files is not None:
0224 for (key, fileName) in files.items():
0225 finalData[key] = (self.curl.FORM_FILE, fileName)
0226 self.curl.setopt( self.curl.HTTPPOST, finalData.items() )
0227
0228 self.curl.setopt(pycurl.VERBOSE, 0)
0229
0230 response = cStringIO.StringIO()
0231 self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
0232 self.curl.perform()
0233
0234 code = self.curl.getinfo(self.curl.RESPONSE_CODE)
0235
0236 if code in self.retryCodes and len(retries) > 0:
0237 logging.debug('Retrying since we got the %s error code...', code)
0238 continue
0239
0240 if code != 200:
0241 raise HTTPError(code, response.getvalue())
0242
0243 return response.getvalue()
0244
0245 except pycurl.error as e:
0246 if len(retries) == 0:
0247 raise e
0248 logging.debug('Retrying since we got the %s pycurl exception...', str(e))
0249
0250
0251
0252 def addToTarFile(tarFile, fileobj, arcname):
0253 tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
0254 tarInfo.mode = 0o400
0255 tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
0256 tarInfo.uname = tarInfo.gname = 'root'
0257 tarFile.addfile(tarInfo, fileobj)
0258
0259 class ConditionsUploader(object):
0260 '''Upload conditions to the CMS conditions uploader service.
0261 '''
0262
0263 def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
0264 self.hostname = hostname
0265 self.urlTemplate = urlTemplate
0266 self.userName = None
0267 self.http = None
0268 self.password = None
0269
0270 def setHost( self, hostname ):
0271 self.hostname = hostname
0272
0273 def signIn(self, username, password):
0274 ''' init the server.
0275 '''
0276 self.http = HTTP()
0277 if socket.getfqdn().strip().endswith('.cms'):
0278 self.http.setProxy('https://cmsproxy.cms:3128/')
0279 self.http.setBaseUrl(self.urlTemplate % self.hostname)
0280 '''Signs in the server.
0281 '''
0282
0283 logging.info('%s: Signing in user %s ...', self.hostname, username)
0284 try:
0285 self.token = self.http.getToken(username, password)
0286 except Exception as e:
0287 logging.error("Caught exception when trying to get token for user %s from %s: %s" % (username, self.hostname, str(e)) )
0288 return False
0289
0290 if not self.token:
0291 logging.error("could not get token for user %s from %s" % (username, self.hostname) )
0292 return False
0293
0294 logging.debug( "got: '%s'", str(self.token) )
0295 self.userName = username
0296 self.password = password
0297 return True
0298
0299 def signInAgain(self):
0300 return self.signIn( self.userName, self.password )
0301
0302 def signOut(self):
0303 '''Signs out the server.
0304 '''
0305
0306 logging.info('%s: Signing out...', self.hostname)
0307
0308 self.token = None
0309
0310
0311 def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
0312 '''Uploads a file to the dropBox.
0313
0314 The filename can be without extension, with .db or with .txt extension.
0315 It will be stripped and then both .db and .txt files are used.
0316 '''
0317
0318 basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0319 metadataFilename = '%s.txt' % basepath
0320 with open(metadataFilename, 'rb') as metadataFile:
0321 metadata = json.load( metadataFile )
0322
0323 forceHost = False
0324 destDb = metadata['destinationDatabase']
0325 ret = False
0326 if destDb.startswith('oracle://cms_orcon_prod') or destDb.startswith('oracle://cms_orcoff_prep'):
0327 if destDb.startswith('oracle://cms_orcoff_prep'):
0328 self.setHost( defaultDevHostname )
0329 self.signInAgain()
0330 forceHost = True
0331 ret = self._uploadFile(filename, backend, temporaryFile)
0332 if forceHost:
0333
0334 self.setHost( defaultHostname )
0335 self.signInAgain()
0336 else:
0337 logging.error("DestinationDatabase %s is not valid. Skipping the upload." %destDb)
0338 return ret
0339
0340 def _uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
0341
0342 basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0343 basename = os.path.basename(basepath)
0344
0345 logging.debug('%s: %s: Creating tar file for upload ...', self.hostname, basename)
0346
0347 try:
0348 tarFile = tarfile.open(temporaryFile, 'w:bz2')
0349
0350 with open('%s.db' % basepath, 'rb') as data:
0351 addToTarFile(tarFile, data, 'data.db')
0352 except Exception as e:
0353 msg = 'Error when creating tar file. \n'
0354 msg += 'Please check that you have write access to the directory you are running,\n'
0355 msg += 'and that you have enough space on this disk (df -h .)\n'
0356 logging.error(msg)
0357 raise Exception(msg)
0358
0359 with tempfile.NamedTemporaryFile() as metadata:
0360 with open('%s.txt' % basepath, 'rb') as originalMetadata:
0361 json.dump(json.load(originalMetadata), metadata, sort_keys = True, indent = 4)
0362
0363 metadata.seek(0)
0364 addToTarFile(tarFile, metadata, 'metadata.txt')
0365
0366 tarFile.close()
0367
0368 logging.debug('%s: %s: Calculating hash...', self.hostname, basename)
0369
0370 fileHash = hashlib.sha1()
0371 with open(temporaryFile, 'rb') as f:
0372 while True:
0373 data = f.read(4 * 1024 * 1024)
0374 if not data:
0375 break
0376 fileHash.update(data)
0377
0378 fileHash = fileHash.hexdigest()
0379 fileInfo = os.stat(temporaryFile)
0380 fileSize = fileInfo.st_size
0381
0382 logging.debug('%s: %s: Hash: %s', self.hostname, basename, fileHash)
0383
0384 logging.info('%s: %s: Uploading file (%s, size %s) to the %s backend...', self.hostname, basename, fileHash, fileSize, backend)
0385 os.rename(temporaryFile, fileHash)
0386 try:
0387 ret = self.http.query('uploadFile',
0388 {
0389 'backend': backend,
0390 'fileName': basename,
0391 'userName': self.userName,
0392 },
0393 files = {
0394 'uploadedFile': fileHash,
0395 }
0396 )
0397 except Exception as e:
0398 logging.error('Error from uploading: %s' % str(e))
0399 ret = json.dumps( { "status": -1, "upload" : { 'itemStatus' : { basename : {'status':'failed', 'info':str(e)}}}, "error" : str(e)} )
0400
0401 os.unlink(fileHash)
0402
0403 statusInfo = json.loads(ret)['upload']
0404 logging.debug( 'upload returned: %s', statusInfo )
0405
0406 okTags = []
0407 skippedTags = []
0408 failedTags = []
0409 for tag, info in statusInfo['itemStatus'].items():
0410 logging.debug('checking tag %s, info %s', tag, str(json.dumps(info, indent=4,sort_keys=True)) )
0411 if 'ok' in info['status'].lower() :
0412 okTags.append( tag )
0413 logging.info('tag %s successfully uploaded', tag)
0414 if 'skip' in info['status'].lower() :
0415 skippedTags.append( tag )
0416 logging.warning('found tag %s to be skipped. reason: \n ... \t%s ', tag, info['info'])
0417 if 'fail' in info['status'].lower() :
0418 failedTags.append( tag )
0419 logging.error('found tag %s failed to upload. reason: \n ... \t%s ', tag, info['info'])
0420
0421 if len(okTags) > 0: logging.info ("tags sucessfully uploaded: %s ", str(okTags) )
0422 if len(skippedTags) > 0: logging.warning("tags SKIPped to upload : %s ", str(skippedTags) )
0423 if len(failedTags) > 0: logging.error ("tags FAILed to upload : %s ", str(failedTags) )
0424
0425 fileLogURL = 'https://%s/logs/dropBox/getFileLog?fileHash=%s'
0426 logging.info('file log at: %s', fileLogURL % (self.hostname,fileHash))
0427
0428 return len(okTags)>0