Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:01:54

0001 #!/usr/bin/env python3
0002 '''Script that uploads to the new CMS conditions uploader.
0003 Adapted to the new infrastructure from v6 of the upload.py script for the DropBox from Miguel Ojeda.
0004 '''
0005 
0006 __author__ = 'Andreas Pfeiffer'
0007 __copyright__ = 'Copyright 2015, CERN CMS'
0008 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
0009 __license__ = 'Unknown'
0010 __maintainer__ = 'Andreas Pfeiffer'
0011 __email__ = 'andreas.pfeiffer@cern.ch'
0012 
0013 
0014 import os
0015 import sys
0016 import optparse
0017 import hashlib
0018 import tarfile
0019 import netrc
0020 import getpass
0021 import errno
0022 import sqlite3
0023 import json
0024 import tempfile
0025 
0026 defaultBackend = 'online'
0027 defaultHostname = 'cms-conddb-prod.cern.ch'
0028 defaultDevHostname = 'cms-conddb-dev.cern.ch'
0029 defaultUrlTemplate = 'https://%s/cmsDbUpload/'
0030 defaultTemporaryFile = 'upload.tar.bz2'
0031 defaultNetrcHost = 'ConditionUploader'
0032 defaultWorkflow = 'offline'
0033 
0034 # common/http.py start (plus the "# Try to extract..." section bit)
0035 import time
0036 import logging
0037 import cStringIO
0038 
0039 import pycurl
0040 import socket
0041 import copy
0042 
0043 
0044 class HTTPError(Exception):
0045     '''A common HTTP exception.
0046 
0047     self.code is the response HTTP code as an integer.
0048     self.response is the response body (i.e. page).
0049     '''
0050 
0051     def __init__(self, code, response):
0052         self.code = code
0053         self.response = response
0054 
0055         # Try to extract the error message if possible (i.e. known error page format)
0056         try:
0057             self.args = (response.split('<p>')[1].split('</p>')[0], )
0058         except Exception:
0059             self.args = (self.response, )
0060             
0061 
0062 CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
0063 
0064 class HTTP(object):
0065     '''Class used for querying URLs using the HTTP protocol.
0066     '''
0067 
0068     retryCodes = frozenset([502, 503])
0069 
0070     def __init__(self):
0071         self.setBaseUrl()
0072         self.setRetries()
0073 
0074         self.curl = pycurl.Curl()
0075         self.curl.setopt(self.curl.COOKIEFILE, '')      # in memory
0076 
0077         #-toDo: make sure we have the right options set here to use ssl
0078         #-review(2015-09-25): check and see - action: AP
0079         # self.curl.setopt(self.curl.SSL_VERIFYPEER, 1)
0080         self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
0081         self.curl.setopt(self.curl.SSL_VERIFYHOST, 2)
0082 
0083         self.baseUrl = None
0084 
0085         self.token = None
0086 
0087     def getCookies(self):
0088         '''Returns the list of cookies.
0089         '''
0090         return self.curl.getinfo(self.curl.INFO_COOKIELIST)
0091 
0092     def discardCookies(self):
0093         '''Discards cookies.
0094         '''
0095         self.curl.setopt(self.curl.COOKIELIST, 'ALL')
0096 
0097 
0098     def setBaseUrl(self, baseUrl = ''):
0099         '''Allows to set a base URL which will be prefixed to all the URLs
0100         that will be queried later.
0101         '''
0102         self.baseUrl = baseUrl
0103 
0104 
0105     def setProxy(self, proxy = ''):
0106         '''Allows to set a proxy.
0107         '''
0108         self.curl.setopt(self.curl.PROXY, proxy)
0109 
0110 
0111     def setTimeout(self, timeout = 0):
0112         '''Allows to set a timeout.
0113         '''
0114         self.curl.setopt(self.curl.TIMEOUT, timeout)
0115 
0116 
0117     def setRetries(self, retries = ()):
0118         '''Allows to set retries.
0119 
0120         The retries are a sequence of the seconds to wait per retry.
0121 
0122         The retries are done on:
0123             * PyCurl errors (includes network problems, e.g. not being able
0124               to connect to the host).
0125             * 502 Bad Gateway (for the moment, to avoid temporary
0126               Apache-CherryPy issues).
0127             * 503 Service Temporarily Unavailable (for when we update
0128               the frontends).
0129         '''
0130         self.retries = retries
0131 
0132     def getToken(self, username, password):
0133 
0134         url = self.baseUrl + 'token'
0135 
0136         self.curl.setopt(pycurl.URL, url)
0137         self.curl.setopt(pycurl.VERBOSE, 0)
0138 
0139         #-toDo: check if/why these are needed ...
0140         #-ap: hmm ...
0141         # self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
0142         # self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
0143         #-end hmmm ...
0144         #-review(2015-09-25): check and see - action: AP
0145 
0146 
0147         self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0148         # self.curl.setopt( self.curl.POST, {})
0149         self.curl.setopt(self.curl.HTTPGET, 0)
0150 
0151         response = cStringIO.StringIO()
0152         self.curl.setopt(pycurl.WRITEFUNCTION, response.write)
0153         self.curl.setopt(pycurl.USERPWD, '%s:%s' % (username, password) )
0154 
0155         logging.debug('going to connect to server at: %s' % url )
0156 
0157         self.curl.perform()
0158         code = self.curl.getinfo(pycurl.RESPONSE_CODE)
0159         logging.debug('got: %s ', str(code))
0160         
0161         try:
0162             self.token = json.loads( response.getvalue() )['token']
0163         except Exception as e:
0164             logging.error('http::getToken> got error from server: %s ', str(e) )
0165             if 'No JSON object could be decoded' in str(e):
0166                 return None
0167             logging.error("error getting token: %s", str(e))
0168             return None
0169 
0170         logging.debug('token: %s', self.token)
0171         logging.debug('returning: %s', response.getvalue())
0172 
0173         return response.getvalue()
0174 
0175     def query(self, url, data = None, files = None, keepCookies = True):
0176         '''Queries a URL, optionally with some data (dictionary).
0177 
0178         If no data is specified, a GET request will be used.
0179         If some data is specified, a POST request will be used.
0180 
0181         If files is specified, it must be a dictionary like data but
0182         the values are filenames.
0183 
0184         By default, cookies are kept in-between requests.
0185 
0186         A HTTPError exception is raised if the response's HTTP code is not 200.
0187         '''
0188 
0189         if not keepCookies:
0190             self.discardCookies()
0191 
0192         url = self.baseUrl + url
0193 
0194         # make sure the logs are safe ... at least somewhat :)
0195         data4log = copy.copy(data)
0196         if data4log:
0197             if 'password' in data4log.keys():
0198                 data4log['password'] = '*'
0199 
0200         retries = [0] + list(self.retries)
0201 
0202         while True:
0203             logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
0204 
0205             time.sleep(retries.pop(0))
0206 
0207             try:
0208                 self.curl.setopt(self.curl.URL, url)
0209                 self.curl.setopt(self.curl.HTTPGET, 1)
0210 
0211                 # from now on we use the token we got from the login
0212                 self.curl.setopt(pycurl.USERPWD, '%s:""' % ( str(self.token), ) )
0213                 self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0214 
0215                 if data is not None or files is not None:
0216                     # If there is data or files to send, use a POST request
0217 
0218                     finalData = {}
0219 
0220                     if data is not None:
0221                         finalData.update(data)
0222 
0223                     if files is not None:
0224                         for (key, fileName) in files.items():
0225                             finalData[key] = (self.curl.FORM_FILE, fileName)
0226                     self.curl.setopt( self.curl.HTTPPOST, finalData.items() )
0227 
0228                 self.curl.setopt(pycurl.VERBOSE, 0)
0229 
0230                 response = cStringIO.StringIO()
0231                 self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
0232                 self.curl.perform()
0233 
0234                 code = self.curl.getinfo(self.curl.RESPONSE_CODE)
0235 
0236                 if code in self.retryCodes and len(retries) > 0:
0237                     logging.debug('Retrying since we got the %s error code...', code)
0238                     continue
0239 
0240                 if code != 200:
0241                     raise HTTPError(code, response.getvalue())
0242 
0243                 return response.getvalue()
0244 
0245             except pycurl.error as e:
0246                 if len(retries) == 0:
0247                     raise e
0248                 logging.debug('Retrying since we got the %s pycurl exception...', str(e))
0249 
0250 # common/http.py end
0251 
0252 def addToTarFile(tarFile, fileobj, arcname):
0253     tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
0254     tarInfo.mode = 0o400
0255     tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
0256     tarInfo.uname = tarInfo.gname = 'root'
0257     tarFile.addfile(tarInfo, fileobj)
0258 
0259 class ConditionsUploader(object):
0260     '''Upload conditions to the CMS conditions uploader service.
0261     '''
0262 
0263     def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
0264         self.hostname = hostname
0265         self.urlTemplate = urlTemplate 
0266         self.userName = None
0267         self.http = None
0268         self.password = None
0269 
0270     def setHost( self, hostname ):
0271         self.hostname = hostname
0272 
0273     def signIn(self, username, password):
0274         ''' init the server.
0275         '''
0276         self.http = HTTP()
0277         if socket.getfqdn().strip().endswith('.cms'):
0278             self.http.setProxy('https://cmsproxy.cms:3128/')
0279         self.http.setBaseUrl(self.urlTemplate % self.hostname)
0280         '''Signs in the server.
0281         '''
0282 
0283         logging.info('%s: Signing in user %s ...', self.hostname, username)
0284         try:
0285             self.token = self.http.getToken(username, password)
0286         except Exception as e:
0287             logging.error("Caught exception when trying to get token for user %s from %s: %s" % (username, self.hostname, str(e)) )
0288             return False
0289 
0290         if not self.token:
0291             logging.error("could not get token for user %s from %s" % (username, self.hostname) )
0292             return False
0293 
0294         logging.debug( "got: '%s'", str(self.token) )
0295         self.userName = username
0296         self.password = password
0297         return True
0298 
0299     def signInAgain(self):
0300         return self.signIn( self.userName, self.password )
0301 
0302     def signOut(self):
0303         '''Signs out the server.
0304         '''
0305 
0306         logging.info('%s: Signing out...', self.hostname)
0307         # self.http.query('logout')
0308         self.token = None
0309 
0310 
0311     def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
0312         '''Uploads a file to the dropBox.
0313 
0314         The filename can be without extension, with .db or with .txt extension.
0315         It will be stripped and then both .db and .txt files are used.
0316         '''
0317 
0318         basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0319         metadataFilename = '%s.txt' % basepath
0320         with open(metadataFilename, 'rb') as metadataFile:
0321             metadata = json.load( metadataFile )
0322         # When dest db = prep the hostname has to be set to dev.
0323         forceHost = False
0324         destDb = metadata['destinationDatabase']
0325         ret = False
0326         if destDb.startswith('oracle://cms_orcon_prod') or destDb.startswith('oracle://cms_orcoff_prep'):
0327             if destDb.startswith('oracle://cms_orcoff_prep'):
0328                     self.setHost( defaultDevHostname )
0329                     self.signInAgain()
0330                     forceHost = True
0331             ret = self._uploadFile(filename, backend, temporaryFile)
0332             if forceHost:
0333                 # set back the hostname to the original global setting
0334                 self.setHost( defaultHostname )
0335                 self.signInAgain()
0336         else:
0337             logging.error("DestinationDatabase %s is not valid. Skipping the upload." %destDb)
0338         return ret
0339 
0340     def _uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
0341 
0342         basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0343         basename = os.path.basename(basepath)
0344 
0345         logging.debug('%s: %s: Creating tar file for upload ...', self.hostname, basename)
0346 
0347         try:
0348             tarFile = tarfile.open(temporaryFile, 'w:bz2')
0349 
0350             with open('%s.db' % basepath, 'rb') as data:
0351                 addToTarFile(tarFile, data, 'data.db')
0352         except Exception as e:
0353             msg = 'Error when creating tar file. \n'
0354             msg += 'Please check that you have write access to the directory you are running,\n'
0355             msg += 'and that you have enough space on this disk (df -h .)\n'
0356             logging.error(msg)
0357             raise Exception(msg)
0358 
0359         with tempfile.NamedTemporaryFile() as metadata:
0360             with open('%s.txt' % basepath, 'rb') as originalMetadata:
0361                 json.dump(json.load(originalMetadata), metadata, sort_keys = True, indent = 4)
0362 
0363             metadata.seek(0)
0364             addToTarFile(tarFile, metadata, 'metadata.txt')
0365 
0366         tarFile.close()
0367 
0368         logging.debug('%s: %s: Calculating hash...', self.hostname, basename)
0369 
0370         fileHash = hashlib.sha1()
0371         with open(temporaryFile, 'rb') as f:
0372             while True:
0373                 data = f.read(4 * 1024 * 1024)
0374                 if not data:
0375                     break
0376                 fileHash.update(data)
0377 
0378         fileHash = fileHash.hexdigest()
0379         fileInfo = os.stat(temporaryFile)
0380         fileSize = fileInfo.st_size
0381 
0382         logging.debug('%s: %s: Hash: %s', self.hostname, basename, fileHash)
0383 
0384         logging.info('%s: %s: Uploading file (%s, size %s) to the %s backend...', self.hostname, basename, fileHash, fileSize, backend)
0385         os.rename(temporaryFile, fileHash)
0386         try:
0387             ret = self.http.query('uploadFile',
0388                               {
0389                                 'backend': backend,
0390                                 'fileName': basename,
0391                                 'userName': self.userName,
0392                               },
0393                               files = {
0394                                         'uploadedFile': fileHash,
0395                                       }
0396                               )
0397         except Exception as e:
0398             logging.error('Error from uploading: %s' % str(e))
0399             ret = json.dumps( { "status": -1, "upload" : { 'itemStatus' : { basename : {'status':'failed', 'info':str(e)}}}, "error" : str(e)} )
0400 
0401         os.unlink(fileHash)
0402 
0403         statusInfo = json.loads(ret)['upload']
0404         logging.debug( 'upload returned: %s', statusInfo )
0405 
0406         okTags      = []
0407         skippedTags = []
0408         failedTags  = []
0409         for tag, info in statusInfo['itemStatus'].items():
0410             logging.debug('checking tag %s, info %s', tag, str(json.dumps(info, indent=4,sort_keys=True)) )
0411             if 'ok'   in info['status'].lower() :
0412                 okTags.append( tag )
0413                 logging.info('tag %s successfully uploaded', tag)
0414             if 'skip' in info['status'].lower() :
0415                 skippedTags.append( tag )
0416                 logging.warning('found tag %s to be skipped. reason:  \n ... \t%s ', tag, info['info'])
0417             if 'fail' in info['status'].lower() :
0418                 failedTags.append( tag )
0419                 logging.error('found tag %s failed to upload. reason: \n ... \t%s ', tag, info['info'])
0420 
0421         if len(okTags)      > 0: logging.info   ("tags sucessfully uploaded: %s ", str(okTags) )
0422         if len(skippedTags) > 0: logging.warning("tags SKIPped to upload   : %s ", str(skippedTags) )
0423         if len(failedTags)  > 0: logging.error  ("tags FAILed  to upload   : %s ", str(failedTags) )
0424 
0425         fileLogURL = 'https://%s/logs/dropBox/getFileLog?fileHash=%s' 
0426         logging.info('file log at: %s', fileLogURL % (self.hostname,fileHash))
0427 
0428         return len(okTags)>0