Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 10:46:32

0001 #!/usr/bin/env python3
0002 '''Script that uploads to the new CMS conditions uploader.
0003 Adapted to the new infrastructure from v6 of the upload.py script for the DropBox from Miguel Ojeda.
0004 '''
0005 from __future__ import print_function
0006 
0007 __author__ = 'Andreas Pfeiffer'
0008 __copyright__ = 'Copyright 2015, CERN CMS'
0009 __credits__ = ['Giacomo Govi', 'Salvatore Di Guida', 'Miguel Ojeda', 'Andreas Pfeiffer']
0010 __license__ = 'Unknown'
0011 __maintainer__ = 'Giacomo Govi'
0012 __email__ = 'giacomo.govi@cern.ch'
0013 __version__ = 1
0014 
0015 
0016 import os
0017 import sys
0018 import optparse
0019 import hashlib
0020 import tarfile
0021 import netrc
0022 import getpass
0023 import errno
0024 import sqlite3
0025 import cx_Oracle
0026 import json
0027 import tempfile
0028 from datetime import datetime
0029 
0030 defaultBackend = 'online'
0031 defaultHostname = 'cms-conddb-prod.cern.ch'
0032 defaultDevHostname = 'cms-conddb-dev.cern.ch'
0033 defaultUrlTemplate = 'https://%s/cmsDbUpload/'
0034 defaultTemporaryFile = 'upload.tar.bz2'
0035 defaultNetrcHost = 'ConditionUploader'
0036 defaultWorkflow = 'offline'
0037 prodLogDbSrv = 'cms_orcoff_prod'
0038 devLogDbSrv = 'cms_orcoff_prep'
0039 logDbSchema = 'CMS_COND_DROPBOX'
0040 authPathEnvVar = 'COND_AUTH_PATH'
0041 waitForRetry = 15
0042 
0043 # common/http.py start (plus the "# Try to extract..." section bit)
0044 import time
0045 import logging
0046 import io
0047 
0048 import pycurl
0049 import socket
0050 import copy
0051 
0052 def getInput(default, prompt = ''):
0053     '''Like input() but with a default and automatic strip().
0054     '''
0055 
0056     answer = input(prompt)
0057     if answer:
0058         return answer.strip()
0059 
0060     return default.strip()
0061 
0062 
0063 def getInputWorkflow(prompt = ''):
0064     '''Like getInput() but tailored to get target workflows (synchronization options).
0065     '''
0066 
0067     while True:
0068         workflow = getInput(defaultWorkflow, prompt)
0069 
0070         if workflow in frozenset(['offline', 'hlt', 'express', 'prompt', 'pcl']):
0071             return workflow
0072 
0073         logging.error('Please specify one of the allowed workflows. See above for the explanation on each of them.')
0074 
0075 
0076 def getInputChoose(optionsList, default, prompt = ''):
0077     '''Makes the user choose from a list of options.
0078     '''
0079 
0080     while True:
0081         index = getInput(default, prompt)
0082 
0083         try:
0084             return optionsList[int(index)]
0085         except ValueError:
0086             logging.error('Please specify an index of the list (i.e. integer).')
0087         except IndexError:
0088             logging.error('The index you provided is not in the given list.')
0089 
0090 
0091 def getInputRepeat(prompt = ''):
0092     '''Like input() but repeats if nothing is provided and automatic strip().
0093     '''
0094 
0095     while True:
0096         answer = input(prompt)
0097         if answer:
0098             return answer.strip()
0099 
0100         logging.error('You need to provide a value.')
0101 
0102 
0103 def runWizard(basename, dataFilename, metadataFilename):
0104     while True:
0105         print('''\nWizard for metadata for %s
0106 
0107 I will ask you some questions to fill the metadata file. For some of the questions there are defaults between square brackets (i.e. []), leave empty (i.e. hit Enter) to use them.''' % basename)
0108 
0109         # Try to get the available inputTags
0110         dataConnection = sqlite3.connect(dataFilename)
0111         dataCursor = dataConnection.cursor()
0112 
0113         dataCursor.execute('select NAME from TAG')
0114         records = dataCursor.fetchall()
0115         inputTags = []
0116         for rec in records:
0117             inputTags.append(rec[0])
0118 
0119         if len(inputTags) == 0:
0120             raise Exception("Could not find any input tag in the data file.")
0121 
0122         else:
0123             print('\nI found the following input tags in your SQLite data file:')
0124             for (index, inputTag) in enumerate(inputTags):
0125                 print('   %s) %s' % (index, inputTag))
0126 
0127             inputTag = getInputChoose(inputTags, '0',
0128                                       '\nWhich is the input tag (i.e. the tag to be read from the SQLite data file)?\ne.g. 0 (you select the first in the list)\ninputTag [0]: ')
0129 
0130         destinationDatabase = ''
0131         ntry = 0
0132         while ( destinationDatabase != 'oracle://cms_orcon_prod/CMS_CONDITIONS' and destinationDatabase != 'oracle://cms_orcoff_prep/CMS_CONDITIONS' ): 
0133             if ntry==0:
0134                 inputMessage = \
0135                 '\nWhich is the destination database where the tags should be exported? \nPossible choices: oracle://cms_orcon_prod/CMS_CONDITIONS (or prod); oracle://cms_orcoff_prep/CMS_CONDITIONS (or prep) \ndestinationDatabase: '
0136             elif ntry==1:
0137                 inputMessage = \
0138                 '\nPlease choose one of the two valid destinations: \noracle://cms_orcon_prod/CMS_CONDITIONS (for prod) or oracle://cms_orcoff_prep/CMS_CONDITIONS (for prep) \
0139 \ndestinationDatabase: '
0140             else:
0141                 raise Exception('No valid destination chosen. Bailing out...')
0142             destinationDatabase = getInputRepeat(inputMessage)
0143             if destinationDatabase == 'prod':
0144                 destinationDatabase = 'oracle://cms_orcon_prod/CMS_CONDITIONS'
0145             if destinationDatabase == 'prep':
0146                 destinationDatabase = 'oracle://cms_orcoff_prep/CMS_CONDITIONS'
0147             ntry += 1
0148 
0149         while True:
0150             since = getInput('',
0151                              '\nWhich is the given since? (if not specified, the one from the SQLite data file will be taken -- note that even if specified, still this may not be the final since, depending on the synchronization options you select later: if the synchronization target is not offline, and the since you give is smaller than the next possible one (i.e. you give a run number earlier than the one which will be started/processed next in prompt/hlt/express), the DropBox will move the since ahead to go to the first safe run instead of the value you gave)\ne.g. 1234\nsince []: ')
0152             if not since:
0153                 since = None
0154                 break
0155             else:
0156                 try:
0157                     since = int(since)
0158                     break
0159                 except ValueError:
0160                     logging.error('The since value has to be an integer or empty (null).')
0161 
0162         userText = getInput('',
0163                             '\nWrite any comments/text you may want to describe your request\ne.g. Muon alignment scenario for...\nuserText []: ')
0164 
0165         destinationTags = {}
0166         while True:
0167             destinationTag = getInput('',
0168                                       '\nWhich is the next destination tag to be added (leave empty to stop)?\ne.g. BeamSpotObjects_PCL_byRun_v0_offline\ndestinationTag []: ')
0169             if not destinationTag:
0170                 if len(destinationTags) == 0:
0171                     logging.error('There must be at least one destination tag.')
0172                     continue
0173                 break
0174 
0175             if destinationTag in destinationTags:
0176                 logging.warning(
0177                     'You already added this destination tag. Overwriting the previous one with this new one.')
0178 
0179             destinationTags[destinationTag] = {
0180             }
0181 
0182         metadata = {
0183             'destinationDatabase': destinationDatabase,
0184             'destinationTags': destinationTags,
0185             'inputTag': inputTag,
0186             'since': since,
0187             'userText': userText,
0188         }
0189 
0190         metadata = json.dumps(metadata, sort_keys=True, indent=4)
0191         print('\nThis is the generated metadata:\n%s' % metadata)
0192 
0193         if getInput('n',
0194                     '\nIs it fine (i.e. save in %s and *upload* the conditions if this is the latest file)?\nAnswer [n]: ' % metadataFilename).lower() == 'y':
0195             break
0196     logging.info('Saving generated metadata in %s...', metadataFilename)
0197     with open(metadataFilename, 'w') as metadataFile:
0198         metadataFile.write(metadata)
0199 
0200 class HTTPError(Exception):
0201     '''A common HTTP exception.
0202 
0203     self.code is the response HTTP code as an integer.
0204     self.response is the response body (i.e. page).
0205     '''
0206 
0207     def __init__(self, code, response):
0208         self.code = code
0209         self.response = response
0210 
0211         # Try to extract the error message if possible (i.e. known error page format)
0212         try:
0213             self.args = (response.split('<p>')[1].split('</p>')[0], )
0214         except Exception:
0215             self.args = (self.response, )
0216             
0217 
0218 CERN_SSO_CURL_CAPATH = '/etc/pki/tls/certs'
0219 
0220 class HTTP(object):
0221     '''Class used for querying URLs using the HTTP protocol.
0222     '''
0223 
0224     retryCodes = frozenset([502, 503])
0225 
0226     def __init__(self):
0227         self.setBaseUrl()
0228         self.setRetries()
0229 
0230         self.curl = pycurl.Curl()
0231         self.curl.setopt(self.curl.COOKIEFILE, '')      # in memory
0232 
0233         #-toDo: make sure we have the right options set here to use ssl
0234         #-review(2015-09-25): check and see - action: AP
0235         # self.curl.setopt(self.curl.SSL_VERIFYPEER, 1)
0236         self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
0237         self.curl.setopt(self.curl.SSL_VERIFYHOST, 2)
0238 
0239         self.baseUrl = None
0240 
0241         self.token = None
0242 
0243     def getCookies(self):
0244         '''Returns the list of cookies.
0245         '''
0246         return self.curl.getinfo(self.curl.INFO_COOKIELIST)
0247 
0248     def discardCookies(self):
0249         '''Discards cookies.
0250         '''
0251         self.curl.setopt(self.curl.COOKIELIST, 'ALL')
0252 
0253 
0254     def setBaseUrl(self, baseUrl = ''):
0255         '''Allows to set a base URL which will be prefixed to all the URLs
0256         that will be queried later.
0257         '''
0258         self.baseUrl = baseUrl
0259 
0260 
0261     def setProxy(self, proxy = ''):
0262         '''Allows to set a proxy.
0263         '''
0264         self.curl.setopt(self.curl.PROXY, proxy)
0265 
0266 
0267     def setTimeout(self, timeout = 0):
0268         '''Allows to set a timeout.
0269         '''
0270         self.curl.setopt(self.curl.TIMEOUT, timeout)
0271 
0272 
0273     def setRetries(self, retries = ()):
0274         '''Allows to set retries.
0275 
0276         The retries are a sequence of the seconds to wait per retry.
0277 
0278         The retries are done on:
0279             * PyCurl errors (includes network problems, e.g. not being able
0280               to connect to the host).
0281             * 502 Bad Gateway (for the moment, to avoid temporary
0282               Apache-CherryPy issues).
0283             * 503 Service Temporarily Unavailable (for when we update
0284               the frontends).
0285         '''
0286         self.retries = retries
0287 
0288     def getToken(self, username, password):
0289 
0290         url = self.baseUrl + 'token'
0291 
0292         self.curl.setopt(pycurl.URL, url)
0293         self.curl.setopt(pycurl.VERBOSE, 0)
0294 
0295         #-toDo: check if/why these are needed ...
0296         #-ap: hmm ...
0297         # self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
0298         # self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
0299         #-end hmmm ...
0300         #-review(2015-09-25): check and see - action: AP
0301 
0302         self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0303         # self.curl.setopt( self.curl.POST, {})
0304         self.curl.setopt(self.curl.HTTPGET, 0)
0305 
0306         response = io.BytesIO()
0307         self.curl.setopt(pycurl.WRITEFUNCTION, response.write)
0308         self.curl.setopt(pycurl.USERPWD, '%s:%s' % (username, password) )
0309         logging.debug('going to connect to server at: %s' % url )
0310 
0311         self.curl.perform()
0312         code = self.curl.getinfo(pycurl.RESPONSE_CODE)
0313         logging.debug('got: %s ', str(code))
0314         if code in ( 502,503,504 ):
0315             logging.debug('Trying again after %d seconds...', waitForRetry)
0316             time.sleep( waitForRetry )
0317             response = io.StringIO()
0318             self.curl.setopt(pycurl.WRITEFUNCTION, response.write)
0319             self.curl.setopt(pycurl.USERPWD, '%s:%s' % (username, password) )
0320             self.curl.perform()
0321             code = self.curl.getinfo(pycurl.RESPONSE_CODE)        
0322         resp = response.getvalue().decode('UTF-8')
0323         errorMsg = None
0324         if code==500 and not resp.find("INVALID_CREDENTIALS")==-1:
0325             logging.error("Invalid credentials provided.")
0326             return None
0327         if code==403 and not resp.find("Unauthorized access")==-1:
0328             logging.error("Unauthorized access. Please check the membership of group 'cms-cond-dropbox'")
0329             return None
0330         if code==200:
0331             try:
0332                 self.token = json.loads( resp )['token']
0333             except Exception as e:
0334                 errorMsg = 'Error while decoding returned json string'
0335                 logging.debug('http::getToken> error while decoding json: %s ', str(resp) )
0336                 logging.debug("error getting token: %s", str(e))
0337                 resp = None
0338         else:
0339             errorMsg = 'HTTP Error code %s ' %code
0340             logging.debug('got: %s ', str(code))
0341             logging.debug('http::getToken> got error from server: %s ', str(resp) )
0342             resp = None
0343         if resp is None:
0344             raise Exception(errorMsg)
0345             
0346         logging.debug('token: %s', self.token)
0347         logging.debug('returning: %s', response.getvalue().decode('UTF-8'))
0348 
0349         return response.getvalue()
0350 
0351     def query(self, url, data = None, files = None, keepCookies = True):
0352         '''Queries a URL, optionally with some data (dictionary).
0353 
0354         If no data is specified, a GET request will be used.
0355         If some data is specified, a POST request will be used.
0356 
0357         If files is specified, it must be a dictionary like data but
0358         the values are filenames.
0359 
0360         By default, cookies are kept in-between requests.
0361 
0362         A HTTPError exception is raised if the response's HTTP code is not 200.
0363         '''
0364 
0365         if not keepCookies:
0366             self.discardCookies()
0367 
0368         url = self.baseUrl + url
0369 
0370         # make sure the logs are safe ... at least somewhat :)
0371         data4log = copy.copy(data)
0372         if data4log:
0373             if 'password' in data4log.keys():
0374                 data4log['password'] = '*'
0375 
0376         retries = [0] + list(self.retries)
0377 
0378         while True:
0379             logging.debug('Querying %s with data %s and files %s (retries left: %s, current sleep: %s)...', url, data4log, files, len(retries), retries[0])
0380 
0381             time.sleep(retries.pop(0))
0382 
0383             try:
0384                 self.curl.setopt(self.curl.URL, url)
0385                 self.curl.setopt(self.curl.HTTPGET, 1)
0386 
0387                 # from now on we use the token we got from the login
0388                 self.curl.setopt(pycurl.USERPWD, '%s:""' % ( str(self.token), ) )
0389                 self.curl.setopt(pycurl.HTTPHEADER, ['Accept: application/json'])
0390 
0391                 if data is not None or files is not None:
0392                     # If there is data or files to send, use a POST request
0393 
0394                     finalData = {}
0395 
0396                     if data is not None:
0397                         finalData.update(data)
0398 
0399                     if files is not None:
0400                         for (key, fileName) in files.items():
0401                             finalData[key] = (self.curl.FORM_FILE, fileName)
0402                     self.curl.setopt( self.curl.HTTPPOST, list(finalData.items()) )
0403 
0404                 self.curl.setopt(pycurl.VERBOSE, 0)
0405 
0406                 response = io.BytesIO()
0407                 self.curl.setopt(self.curl.WRITEFUNCTION, response.write)
0408                 self.curl.perform()
0409 
0410                 code = self.curl.getinfo(self.curl.RESPONSE_CODE)
0411 
0412                 if code in self.retryCodes and len(retries) > 0:
0413                     logging.debug('Retrying since we got the %s error code...', code)
0414                     continue
0415 
0416                 if code != 200:
0417                     raise HTTPError(code, response.getvalue())
0418 
0419                 return response.getvalue()
0420 
0421             except pycurl.error as e:
0422                 if len(retries) == 0:
0423                     raise e
0424                 logging.debug('Retrying since we got the %s pycurl exception...', str(e))
0425 
0426 # common/http.py end
0427 
0428 def addToTarFile(tarFile, fileobj, arcname):
0429     tarInfo = tarFile.gettarinfo(fileobj = fileobj, arcname = arcname)
0430     tarInfo.mode = 0o400
0431     tarInfo.uid = tarInfo.gid = tarInfo.mtime = 0
0432     tarInfo.uname = tarInfo.gname = 'root'
0433     tarFile.addfile(tarInfo, fileobj)
0434 
0435 class ConditionsUploader(object):
0436     '''Upload conditions to the CMS conditions uploader service.
0437     '''
0438 
0439     def __init__(self, hostname = defaultHostname, urlTemplate = defaultUrlTemplate):
0440         self.hostname = hostname
0441         self.urlTemplate = urlTemplate 
0442         self.userName = None
0443         self.http = None
0444         self.password = None
0445         self.token = None
0446 
0447     def setHost( self, hostname ):
0448         if not hostname==self.hostname:
0449             self.token = None
0450             self.hostname = hostname
0451 
0452     def signIn(self, username, password ):
0453         if self.token is None:
0454             logging.debug("Initializing connection with server %s",self.hostname)
0455             ''' init the server.
0456             '''
0457             self.http = HTTP()
0458             if socket.getfqdn().strip().endswith('.cms'):
0459                 self.http.setProxy('https://cmsproxy.cms:3128/')
0460             self.http.setBaseUrl(self.urlTemplate % self.hostname)
0461             '''Signs in the server.
0462             '''
0463             logging.info('%s: Signing in user %s ...', self.hostname, username)
0464             try:
0465                 self.token = self.http.getToken(username, password)
0466             except Exception as e:
0467                 ret = -1
0468                 # optionally, we may want to have a different return for network related errors:
0469                 #code = self.http.curl.getinfo(pycurl.RESPONSE_CODE)
0470                 #if code in ( 502,503,504 ):
0471                 #    ret = -10
0472                 logging.error("Caught exception when trying to connect to %s: %s" % (self.hostname, str(e)) )
0473                 return ret
0474 
0475             if not self.token:
0476                 logging.error("could not get token for user %s from %s" % (username, self.hostname) )
0477                 return -2
0478             logging.debug( "got: '%s'", str(self.token) )
0479             self.userName = username
0480             self.password = password
0481         else:
0482             logging.debug("User %s has been already authenticated." %username)
0483         return 0
0484 
0485     def signOut(self):
0486         '''Signs out the server.
0487         '''
0488 
0489         logging.info('%s: Signing out...', self.hostname)
0490         # self.http.query('logout')
0491         self.token = None
0492 
0493 
0494     def _checkForUpdates(self):
0495         '''Updates this script, if a new version is found.
0496         '''
0497 
0498         logging.debug('%s: Checking if a newer version of this script is available ...', self.hostname)
0499         version = int(self.http.query('getUploadScriptVersion'))
0500 
0501         if version <= __version__:
0502             logging.debug('%s: Script is up-to-date.', self.hostname)
0503             return
0504 
0505         logging.info('%s: Updating to a newer version (%s) than the current one (%s): downloading ...', self.hostname, version, __version__)
0506 
0507         uploadScript = self.http.query('getUploadScript')
0508 
0509         self.signOut()
0510 
0511         logging.info('%s: ... saving the new version ...', self.hostname)
0512         with open(sys.argv[0], 'wb') as f:
0513             f.write(uploadScript)
0514 
0515         logging.info('%s: ... executing the new version...', self.hostname)
0516         os.execl(sys.executable, *([sys.executable] + sys.argv))
0517 
0518 
0519     def uploadFile(self, filename, backend = defaultBackend, temporaryFile = defaultTemporaryFile):
0520         '''Uploads a file to the dropBox.
0521 
0522         The filename can be without extension, with .db or with .txt extension.
0523         It will be stripped and then both .db and .txt files are used.
0524         '''
0525 
0526         basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0527         basename = os.path.basename(basepath)
0528 
0529         logging.debug('%s: %s: Creating tar file for upload ...', self.hostname, basename)
0530 
0531         try:
0532             tarFile = tarfile.open(temporaryFile, 'w:bz2')
0533 
0534             with open('%s.db' % basepath, 'rb') as data:
0535                 addToTarFile(tarFile, data, 'data.db')
0536         except Exception as e:
0537             msg = 'Error when creating tar file. \n'
0538             msg += 'Please check that you have write access to the directory you are running,\n'
0539             msg += 'and that you have enough space on this disk (df -h .)\n'
0540             logging.error(msg)
0541             raise Exception(msg)
0542 
0543         with tempfile.NamedTemporaryFile(mode='rb+') as metadata:
0544             with open('%s.txt' % basepath, 'r') as originalMetadata:
0545                 metadata.write(json.dumps(json.load(originalMetadata), sort_keys = True, indent = 4).encode())
0546 
0547             metadata.seek(0)
0548             addToTarFile(tarFile, metadata, 'metadata.txt')
0549 
0550         tarFile.close()
0551 
0552         logging.debug('%s: %s: Calculating hash...', self.hostname, basename)
0553 
0554         fileHash = hashlib.sha1()
0555         with open(temporaryFile, 'rb') as f:
0556             while True:
0557                 data = f.read(4 * 1024 * 1024)
0558                 if not data:
0559                     break
0560                 fileHash.update(data)
0561 
0562         fileHash = fileHash.hexdigest()
0563         fileInfo = os.stat(temporaryFile)
0564         fileSize = fileInfo.st_size
0565 
0566         logging.debug('%s: %s: Hash: %s', self.hostname, basename, fileHash)
0567 
0568         logging.info('%s: %s: Uploading file (%s, size %s) to the %s backend...', self.hostname, basename, fileHash, fileSize, backend)
0569         os.rename(temporaryFile, fileHash)
0570         try:
0571             ret = self.http.query('uploadFile',
0572                               {
0573                                 'backend': backend,
0574                                 'fileName': basename,
0575                                 'userName': self.userName,
0576                               },
0577                               files = {
0578                                         'uploadedFile': fileHash,
0579                                       }
0580                               )
0581         except Exception as e:
0582             logging.error('Error from uploading: %s' % str(e))
0583             ret = json.dumps( { "status": -1, "upload" : { 'itemStatus' : { basename : {'status':'failed', 'info':str(e)}}}, "error" : str(e)} )
0584 
0585         os.unlink(fileHash)
0586 
0587         statusInfo = json.loads(ret)['upload']
0588         logging.debug( 'upload returned: %s', statusInfo )
0589 
0590         okTags      = []
0591         skippedTags = []
0592         failedTags  = []
0593         for tag, info in statusInfo['itemStatus'].items():
0594             logging.debug('checking tag %s, info %s', tag, str(json.dumps(info, indent=4,sort_keys=True)) )
0595             if 'ok'   in info['status'].lower() :
0596                 okTags.append( tag )
0597                 logging.info('tag %s successfully uploaded', tag)
0598             if 'skip' in info['status'].lower() :
0599                 skippedTags.append( tag )
0600                 logging.warning('found tag %s to be skipped. reason:  \n ... \t%s ', tag, info['info'])
0601             if 'fail' in info['status'].lower() :
0602                 failedTags.append( tag )
0603                 logging.error('found tag %s failed to upload. reason: \n ... \t%s ', tag, info['info'])
0604 
0605         if len(okTags)      > 0: logging.info   ("tags sucessfully uploaded: %s ", str(okTags) )
0606         if len(skippedTags) > 0: logging.warning("tags SKIPped to upload   : %s ", str(skippedTags) )
0607         if len(failedTags)  > 0: logging.error  ("tags FAILed  to upload   : %s ", str(failedTags) )
0608 
0609         fileLogURL = 'https://cms-conddb.cern.ch/cmsDbBrowser/logs/show_cond_uploader_log/%s/%s' 
0610         backend = 'Prod'
0611         if self.hostname=='cms-conddb-dev.cern.ch':
0612             backend = 'Prep'
0613         logging.info('file log at: %s', fileLogURL % (backend,fileHash))
0614 
0615         return len(okTags)>0
0616 
0617 def getCredentials( options ):
0618 
0619     username = None
0620     password = None
0621     netrcPath = None
0622     if authPathEnvVar in os.environ:
0623         authPath = os.environ[authPathEnvVar]
0624         netrcPath = os.path.join(authPath,'.netrc')
0625     if options.authPath is not None:
0626         netrcPath = os.path.join( options.authPath,'.netrc' )
0627     try:
0628         # Try to find the netrc entry
0629         (username, account, password) = netrc.netrc( netrcPath ).authenticators(options.netrcHost)
0630     except Exception:
0631         # netrc entry not found, ask for the username and password
0632         logging.info(
0633             'netrc entry "%s" not found: if you wish not to have to retype your password, you can add an entry in your .netrc file. However, beware of the risks of having your password stored as plaintext. Instead.',
0634             options.netrcHost)
0635 
0636         # Try to get a default username
0637         defaultUsername = getpass.getuser()
0638         if defaultUsername is None:
0639             defaultUsername = '(not found)'
0640 
0641         username = getInput(defaultUsername, '\nUsername [%s]: ' % defaultUsername)
0642         password = getpass.getpass('Password: ')
0643 
0644     return username, password
0645 
0646 
0647 def uploadAllFiles(options, arguments):
0648     
0649     ret = {}
0650     ret['status'] = 0
0651 
0652     # Check that we can read the data and metadata files
0653     # If the metadata file does not exist, start the wizard
0654     for filename in arguments:
0655         basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0656         basename = os.path.basename(basepath)
0657         dataFilename = '%s.db' % basepath
0658         metadataFilename = '%s.txt' % basepath
0659 
0660         logging.info('Checking %s...', basename)
0661 
0662         # Data file
0663         try:
0664             with open(dataFilename, 'rb') as dataFile:
0665                 pass
0666         except IOError as e:
0667             errMsg = 'Impossible to open SQLite data file %s' %dataFilename
0668             logging.error( errMsg )
0669             ret['status'] = -3
0670             ret['error'] = errMsg
0671             return ret
0672 
0673         # Check the data file
0674         empty = True
0675         try:
0676             dbcon = sqlite3.connect( dataFilename )
0677             dbcur = dbcon.cursor()
0678             dbcur.execute('SELECT * FROM IOV')
0679             rows = dbcur.fetchall()
0680             for r in rows:
0681                 empty = False
0682             dbcon.close()
0683             if empty:
0684                 errMsg = 'The input SQLite data file %s contains no data.' %dataFilename
0685                 logging.error( errMsg )
0686                 ret['status'] = -4
0687                 ret['error'] = errMsg
0688                 return ret
0689         except Exception as e:
0690             errMsg = 'Check on input SQLite data file %s failed: %s' %(dataFilename,str(e))
0691             logging.error( errMsg )
0692             ret['status'] = -5
0693             ret['error'] = errMsg
0694             return ret
0695 
0696         # Metadata file
0697         try:
0698             with open(metadataFilename, 'rb') as metadataFile:
0699                 pass
0700         except IOError as e:
0701             if e.errno != errno.ENOENT:
0702                 errMsg = 'Impossible to open file %s (for other reason than not existing)' %metadataFilename
0703                 logging.error( errMsg )
0704                 ret['status'] = -4
0705                 ret['error'] = errMsg
0706                 return ret
0707 
0708             if getInput('y', '\nIt looks like the metadata file %s does not exist. Do you want me to create it and help you fill it?\nAnswer [y]: ' % metadataFilename).lower() != 'y':
0709                 errMsg = 'Metadata file %s does not exist' %metadataFilename
0710                 logging.error( errMsg )
0711                 ret['status'] = -5
0712                 ret['error'] = errMsg
0713                 return ret
0714             # Wizard
0715             runWizard(basename, dataFilename, metadataFilename)
0716 
0717     # Upload files
0718     try:
0719         dropBox = ConditionsUploader(options.hostname, options.urlTemplate)
0720 
0721         # Authentication
0722         username, password = getCredentials(options)
0723 
0724         results = {}
0725         for filename in arguments:
0726             backend = options.backend
0727             basepath = filename.rsplit('.db', 1)[0].rsplit('.txt', 1)[0]
0728             metadataFilename = '%s.txt' % basepath
0729             with open(metadataFilename, 'rb') as metadataFile:
0730                 metadata = json.load( metadataFile )
0731             # When dest db = prep the hostname has to be set to dev.
0732             forceHost = False
0733             destDb = metadata['destinationDatabase']
0734             if destDb.startswith('oracle://cms_orcon_prod') or destDb.startswith('oracle://cms_orcoff_prep'):
0735                 hostName = defaultHostname
0736                 if destDb.startswith('oracle://cms_orcoff_prep'):
0737                      hostName = defaultDevHostname
0738                 dropBox.setHost( hostName )
0739                 authRet = dropBox.signIn( username, password )
0740                 if not authRet==0:
0741                     msg = "Error trying to connect to the server. Aborting."
0742                     if authRet==-2:
0743                         msg = "Error while signin in. Aborting."
0744                     logging.error(msg)
0745                     return { 'status' : authRet, 'error' : msg }
0746                 results[filename] = dropBox.uploadFile(filename, options.backend, options.temporaryFile)
0747             else:
0748                 results[filename] = False
0749                 logging.error("DestinationDatabase %s is not valid. Skipping the upload." %destDb)
0750             if not results[filename]:
0751                 if ret['status']<0:
0752                     ret['status'] = 0
0753                 ret['status'] += 1
0754         ret['files'] = results
0755         logging.debug("all files processed, logging out now.")
0756 
0757         dropBox.signOut()
0758 
0759     except HTTPError as e:
0760         logging.error('got HTTP error: %s', str(e))
0761         return { 'status' : -1, 'error' : str(e) }
0762 
0763     return ret
0764 
0765 def uploadTier0Files(filenames, username, password, cookieFileName = None):
0766     '''Uploads a bunch of files coming from Tier0.
0767     This has the following requirements:
0768         * Username/Password based authentication.
0769         * Uses the online backend.
0770         * Ignores errors related to the upload/content (e.g. duplicated file).
0771     '''
0772 
0773     dropBox = ConditionsUploader()
0774 
0775     dropBox.signIn(username, password)
0776 
0777     for filename in filenames:
0778         try:
0779             result = dropBox.uploadFile(filename, backend = 'test')
0780         except HTTPError as e:
0781             if e.code == 400:
0782                 # 400 Bad Request: This is an exception related to the upload
0783                 # being wrong for some reason (e.g. duplicated file).
0784                 # Since for Tier0 this is not an issue, continue
0785                 logging.error('HTTP Exception 400 Bad Request: Upload-related, skipping. Message: %s', e)
0786                 continue
0787 
0788             # In any other case, re-raise.
0789             raise
0790 
0791         #-toDo: add a flag to say if we should retry or not. So far, all retries are done server-side (Tier-0),
0792         #       if we flag as failed any retry would not help and would result in the same error (e.g.
0793         #       when a file with an identical hash is uploaded again)
0794         #-review(2015-09-25): get feedback from tests at Tier-0 (action: AP)
0795 
0796         if not result: # dropbox reported an error when uploading, do not retry.
0797             logging.error('Error from dropbox, upload-related, skipping.')
0798             continue
0799 
0800     dropBox.signOut()
0801 
0802 def re_upload( options ):
0803     netrcPath = None
0804     logDbSrv = prodLogDbSrv
0805     if options.hostname == defaultDevHostname:
0806         logDbSrv = devLogDbSrv
0807     if options.authPath is not None:
0808         netrcPath = os.path.join( options.authPath,'.netrc' )
0809     try:
0810         netrcKey = '%s/%s' %(logDbSrv,logDbSchema)
0811         # Try to find the netrc entry
0812         (username, account, password) = netrc.netrc( netrcPath ).authenticators( netrcKey )
0813     except IOError as e:
0814         logging.error('Cannot access netrc file.')
0815         return 1
0816     except Exception as e:
0817         logging.error('Netrc file is invalid: %s' %str(e))
0818         return 1
0819     conStr = '%s/%s@%s' %(username,password,logDbSrv)
0820     con = cx_Oracle.connect( conStr )
0821     cur = con.cursor()
0822     fh = options.reUpload
0823     cur.execute('SELECT FILECONTENT, STATE FROM FILES WHERE FILEHASH = :HASH',{'HASH':fh})
0824     res = cur.fetchall()
0825     found = False
0826     fdata = None
0827     for r in res:
0828         found = True
0829         logging.info("Found file %s in state '%s;" %(fh,r[1]))
0830         fdata = r[0].read().decode('bz2')
0831     con.close()
0832     if not found:
0833         logging.error("No file uploaded found with hash %s" %fh)
0834         return 1
0835     # writing as a tar file and open it ( is there a why to open it in memory?)
0836     fname = '%s.tar' %fh
0837     with open(fname, "wb" ) as f:
0838         f.write(fdata)
0839     rname = 'reupload_%s' %fh
0840     with tarfile.open(fname) as tar:
0841         tar.extractall()
0842     os.remove(fname)
0843     dfile = 'data.db'
0844     mdfile = 'metadata.txt'
0845     if os.path.exists(dfile):
0846         os.utime(dfile,None)
0847         os.chmod(dfile,0o755)
0848         os.rename(dfile,'%s.db' %rname)
0849     else:
0850         logging.error('Tar file does not contain the data file')
0851         return 1
0852     if os.path.exists(mdfile):
0853         os.utime(mdfile,None)
0854         os.chmod(mdfile,0o755)
0855         mdata = None
0856         with open(mdfile) as md:
0857             mdata = json.load(md)
0858         datelabel = datetime.now().strftime("%y-%m-%d %H:%M:%S")
0859         if mdata is None:
0860             logging.error('Metadata file is empty.')
0861             return 1
0862         logging.debug('Preparing new metadata file...')
0863         mdata['userText'] = 'reupload %s : %s' %(datelabel,mdata['userText'])
0864         with open( '%s.txt' %rname, 'wb') as jf:
0865             jf.write( json.dumps( mdata, sort_keys=True, indent = 2 ) )
0866             jf.write('\n')
0867         os.remove(mdfile)
0868     else:
0869         logging.error('Tar file does not contain the metadata file')
0870         return 1
0871     logging.info('Files %s prepared for the upload.' %rname)
0872     arguments = [rname]
0873     return upload(options, arguments)
0874 
0875 def upload(options, arguments):
0876     results = uploadAllFiles(options, arguments)
0877 
0878     if 'status' not in results:
0879         print('Unexpected error.')
0880         return -1
0881     ret = results['status']
0882     print(results)
0883     print("upload ended with code: %s" %ret)
0884     return ret    
0885 
0886 def main():
0887     '''Entry point.
0888     '''
0889 
0890     parser = optparse.OptionParser(usage =
0891         'Usage: %prog [options] <file> [<file> ...]\n'
0892     )
0893 
0894     parser.add_option('-d', '--debug',
0895         dest = 'debug',
0896         action="store_true",
0897         default = False,
0898         help = 'Switch on printing debug information. Default: %default',
0899     )
0900 
0901     parser.add_option('-b', '--backend',
0902         dest = 'backend',
0903         default = defaultBackend,
0904         help = 'dropBox\'s backend to upload to. Default: %default',
0905     )
0906 
0907     parser.add_option('-H', '--hostname',
0908         dest = 'hostname',
0909         default = defaultHostname,
0910         help = 'dropBox\'s hostname. Default: %default',
0911     )
0912 
0913     parser.add_option('-u', '--urlTemplate',
0914         dest = 'urlTemplate',
0915         default = defaultUrlTemplate,
0916         help = 'dropBox\'s URL template. Default: %default',
0917     )
0918 
0919     parser.add_option('-f', '--temporaryFile',
0920         dest = 'temporaryFile',
0921         default = defaultTemporaryFile,
0922         help = 'Temporary file that will be used to store the first tar file. Note that it then will be moved to a file with the hash of the file as its name, so there will be two temporary files created in fact. Default: %default',
0923     )
0924 
0925     parser.add_option('-n', '--netrcHost',
0926         dest = 'netrcHost',
0927         default = defaultNetrcHost,
0928         help = 'The netrc host (machine) from where the username and password will be read. Default: %default',
0929     )
0930 
0931     parser.add_option('-a', '--authPath',
0932         dest = 'authPath',
0933         default = None,
0934         help = 'The path of the .netrc file for the authentication. Default: $HOME',
0935     )
0936 
0937     parser.add_option('-r', '--reUpload',
0938         dest = 'reUpload',
0939         default = None,
0940         help = 'The hash of the file to upload again.',
0941     )
0942 
0943     (options, arguments) = parser.parse_args()
0944 
0945     logLevel = logging.INFO
0946     if options.debug:
0947         logLevel = logging.DEBUG
0948     logging.basicConfig(
0949         format = '[%(asctime)s] %(levelname)s: %(message)s',
0950         level = logLevel,
0951     )
0952 
0953     if len(arguments) < 1:
0954         if options.reUpload is None:
0955             parser.print_help()
0956             return -2
0957         else:
0958             return re_upload(options)
0959     if options.reUpload is not None:
0960         print("ERROR: options -r can't be specified on a new file upload.")
0961         return -2
0962 
0963     return upload(options, arguments)
0964 
0965 def testTier0Upload():
0966 
0967     global defaultNetrcHost
0968 
0969     (username, account, password) = netrc.netrc().authenticators(defaultNetrcHost)
0970 
0971     filenames = ['testFiles/localSqlite-top2']
0972 
0973     uploadTier0Files(filenames, username, password, cookieFileName = None)
0974 
0975 
0976 if __name__ == '__main__':
0977 
0978     sys.exit(main())
0979     # testTier0Upload()