Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /DQMServices/Components/scripts/dqm-ls is written in an unsupported language. File is not indexed.

0001 #!/usr/bin/env python3
0002 
0003 """Usage: dqm-ls [-s SERVER] [-n CONNECTIONS]
0004 
0005 Parse ROOT file contents listings on a DQM GUI server.
0006 
0007 In order to authenticate to the target server, standard grid certificate
0008 environment must be available. Typically this would be X509_CERT_DIR and
0009 either X509_USER_PROXY or X509_USER_CERT and X509_USER_KEY environment
0010 variables. If these variables are not set, the following defaults are
0011 checked for existence. Note that if the script falls back on using a
0012 key rather than a proxy, it will prompt for the key password.
0013 - $X509_CERT_DIR: /etc/grid-security/certificates
0014 - $X509_USER_KEY: $HOME/.globus/userkey.pem
0015 - $X509_USER_CERT: $HOME/.globus/usercert.pem
0016 """
0017 
0018 from DQMServices.Components.HTTP import RequestManager
0019 from DQMServices.Components.X509 import SSLOptions
0020 import os, sys, re, pycurl, urllib
0021 from optparse import OptionParser
0022 from time import time, strptime, strftime, gmtime
0023 from calendar import timegm
0024 from datetime import datetime
0025 from urllib.parse import urlparse, quote
0026 from tempfile import mkstemp
0027 from traceback import print_exc
0028 from functools import cmp_to_key
0029 
0030 # Object types.
0031 DIR = 0
0032 FILE = 1
0033 
0034 # HTTP protocol `User-agent` identification string.
0035 ident = "DQMLS/1.0 python/%s.%s.%s" % sys.version_info[:3]
0036 
0037 # SSL/X509 options.
0038 ssl_opts = None
0039 
0040 # HTTP request manager for content requests.
0041 reqman = None
0042 
0043 # Number of HTTP requests made for content.
0044 nfetched = 0
0045 
0046 # Found objects.
0047 found = []
0048 
0049 def logme(msg, *args):
0050   """Generate agent log message."""
0051   procid = "[%s/%d]" % (__file__.rsplit("/", 1)[-1], os.getpid())
0052   print(datetime.now(), procid, msg % args)
0053 
0054 def myumask():
0055   """Get the current process umask."""
0056   val = os.umask(0)
0057   os.umask(val)
0058   return val
0059 
0060 def handle_init(c):
0061   """Prepare custom properties on download handles."""
0062   c.temp_file = None
0063   c.temp_path = None
0064   c.local_path = None
0065 
0066 def request_init(c, options, path, kind):
0067   """`RequestManager` callback to initialise directory contents request."""
0068 
0069   c.setopt(pycurl.URL, options.server + quote(path) + ((path != "/" and "/") or ""))
0070   assert c.temp_file == None
0071   assert c.temp_path == None
0072   assert c.local_path == None
0073 
0074   # If this is file, prepare temporary destination file in the target
0075   # directory for possible download. parse_dir_and_files() will finish this off.
0076   if kind == FILE and options.download:
0077     try:
0078       (fd, tmp) = mkstemp()
0079       fp = os.fdopen(fd, 'wb')
0080       c.setopt(pycurl.WRITEFUNCTION, fp.write)
0081       c.temp_file = fp
0082       c.temp_path = tmp
0083       c.local_path = path.strip('/')
0084       c.buffer = None
0085     except Exception as e:
0086       logme("ERROR: %s: %s", path, str(e))
0087       print_exc()
0088 
0089 def cleanup(c):
0090   """Clean up file copy operation, usually after any failures."""
0091   if c.temp_file:
0092     try: c.temp_file.close()
0093     except: pass
0094   if c.temp_path:
0095     try: os.remove(c.temp_path)
0096     except: pass
0097   if c.local_path:
0098     try: os.remove(c.local_path)
0099     except: pass
0100   c.temp_file = None
0101   c.temp_path = None
0102   c.local_path = None
0103   c.buffer = None
0104 
0105 def report_error(c, task, errmsg, errno):
0106   """`RequestManager` callback to report directory contents request errors."""
0107   print("FAILED to retrieve %s: %s (%d)" % (task, errmsg, errno), file=sys.stderr)
0108   global nfetched; nfetched += 1
0109 
0110 def parse_dir_and_files(c):
0111   """`RequestManager` callback to handle directory content response.
0112 
0113 This gets called once per every directory which has been successfully
0114 retrieved from the server. It parses the HTML response and turns it
0115 into object listing with all the file meta information.
0116 
0117 If verbosity has been requested, also shows simple progress bar on the
0118 search progress, one dot for every ten directories retrieved."""
0119   options, path, kind = c.task
0120   root_url = urlparse(options.server).path.rstrip('/')
0121 
0122   if kind == FILE and options.download:
0123     assert c.local_path, "Expected local path property to be set"
0124     assert c.temp_file, "Exepected temporary file property to be set"
0125     try:
0126       c.setopt(pycurl.WRITEFUNCTION, lambda *args: None)
0127       c.temp_file.close()
0128       c.temp_file = None
0129 
0130       os.chmod(c.temp_path, 0o0666 & ~UMASK)
0131       os.system('mv %s %s'  % (c.temp_path, c.local_path))
0132 
0133       c.local_path = None
0134       logme("INFO: downloaded %s", path)
0135     except Exception as e:
0136       logme("ERROR: downloading %s into %s failed: %s",
0137             path, c.local_path, str(e))
0138       print_exc()
0139     finally:
0140       cleanup(c)
0141 
0142   elif kind == DIR:
0143     items = re.findall(r"<tr><td><a href='(.*?)'>(.*?)</a></td><td>(\d+|&nbsp;|-)</td>"
0144                        r"<td>(&nbsp;|\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d UTC)</td>",
0145                        c.buffer.getvalue().decode('utf-8'))
0146 
0147     for path, name, size, date in items:
0148       assert path.startswith(root_url)
0149       path = path[len(root_url):]
0150 
0151       if date == "&nbsp;":
0152         date = -1
0153       else:
0154         date = timegm(strptime(date, "%Y-%m-%d %H:%M:%S %Z"))
0155 
0156       if size == "&nbsp;" or size == "-":
0157         size = -1
0158       else:
0159         size = int(size)
0160 
0161       if path.endswith("/"):
0162         assert size == -1
0163         path = path[:-1]
0164         obj = {'task': c.task,
0165                'kind': DIR,
0166                'name': name,
0167                'size': size,
0168                'date': date,
0169                'path': path}
0170         found.append(obj)
0171         if options.recursive:
0172           reqman.put((options, path, DIR))
0173       else:
0174         assert size >= 0
0175         obj = {'task': c.task,
0176                'kind': FILE,
0177                'name': name,
0178                'size': size,
0179                'date': date,
0180                'path': path}
0181         if options.match_file:
0182           if name == options.match_file:
0183             found.append(obj)
0184             if options.download:
0185               reqman.put((options, path, FILE))
0186         else:
0187           found.append(obj)
0188           if options.download:
0189             reqman.put((options, path, FILE))
0190 
0191       global nfetched
0192       nfetched += 1
0193       if options.verbose and nfetched % 10 == 0:
0194         sys.stdout.write(".")
0195         sys.stdout.flush()
0196         if nfetched % 750 == 0:
0197           print()
0198 
0199 def objcmp(a, b):
0200   diff = 0
0201   diff = a['date'] - b['date']
0202   if diff:
0203     return diff
0204   diff = a['size'] - b['size']
0205   if diff:
0206     return diff
0207   return diff
0208 
0209 # Parse command line options.
0210 op = OptionParser(usage = __doc__)
0211 op.add_option("-s", "--server", dest = "server",
0212               type = "string", action = "store", metavar = "SERVER",
0213               default = "https://cmsweb.cern.ch/dqm/offline/data/browse",
0214               help = "Pull content from SERVER")
0215 op.add_option("-n", "--connections", dest = "connections",
0216               type = "int", action = "store", metavar = "NUM",
0217               default = 10, help = "Use NUM concurrent connections")
0218 op.add_option("-v", "--verbose", dest = "verbose",
0219               action = "store_true", default = False,
0220               help = "Show verbose scan information")
0221 op.add_option("-r", "--recursive", dest = "recursive",
0222               action = "store_true", default = False,
0223               help = "Perform a recursive scan starting from the root directory.")
0224 op.add_option("-d", "--download", dest = "download",
0225               action = "store_true", default = False,
0226               help = "Download all touched ROOT files locally. To be used with extreme care.")
0227 op.add_option("-m", "--match_file", dest = "match_file",
0228               type= "string", action = "store", default = "",
0229               help = "Filter results based on exact file name matching.")
0230 options, args = op.parse_args()
0231 if args:
0232   print("Too many arguments", file=sys.stderr)
0233   sys.exit(1)
0234 if not options.server:
0235   print("Server contact string required", sys.stderr)
0236   sys.exit(1)
0237 
0238 UMASK = myumask()
0239 
0240 # In case a user specifies a root file as address, remove it from the
0241 # address and return results that match only that name.
0242 gr = re.match("(.*)/(DQM_V.*\.root)$", options.server)
0243 if gr:
0244   options.server = gr.group(1)
0245   options.match_file = gr.group(2)
0246 
0247 # Get SSL X509 parametres.
0248 ssl_opts = SSLOptions()
0249 if options.verbose:
0250   print("Using SSL cert dir", ssl_opts.ca_path)
0251   print("Using SSL private key", ssl_opts.key_file)
0252   print("Using SSL public key", ssl_opts.cert_file)
0253 
0254 # Start a request manager for contents.
0255 reqman = RequestManager(num_connections = options.connections,
0256                         ssl_opts = ssl_opts,
0257                         user_agent = ident,
0258                         request_init = request_init,
0259                         request_respond = parse_dir_and_files,
0260                         request_error = report_error,
0261                         handle_init = handle_init)
0262 
0263 # Process from root directory.
0264 start = time()
0265 reqman.put((options, "/", DIR))
0266 reqman.process()
0267 end = time()
0268 
0269 for x in sorted(found, key=cmp_to_key(objcmp)):
0270   print("%20s\t%s\t%s" % (x['size'],
0271                           strftime("%Y-%m-%d %H:%M:%S %Z", gmtime(x['date'])),
0272                           ((options.recursive and x['path']) or x['name'])))
0273 
0274 if options.verbose:
0275   print("\nFound %d directories, %d objects in %.3f seconds" % (nfetched, len(found), end - start))