Warning, /DQMServices/Components/scripts/dqm-ls is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002
0003 """Usage: dqm-ls [-s SERVER] [-n CONNECTIONS]
0004
0005 Parse ROOT file contents listings on a DQM GUI server.
0006
0007 In order to authenticate to the target server, standard grid certificate
0008 environment must be available. Typically this would be X509_CERT_DIR and
0009 either X509_USER_PROXY or X509_USER_CERT and X509_USER_KEY environment
0010 variables. If these variables are not set, the following defaults are
0011 checked for existence. Note that if the script falls back on using a
0012 key rather than a proxy, it will prompt for the key password.
0013 - $X509_CERT_DIR: /etc/grid-security/certificates
0014 - $X509_USER_KEY: $HOME/.globus/userkey.pem
0015 - $X509_USER_CERT: $HOME/.globus/usercert.pem
0016 """
0017
0018 from DQMServices.Components.HTTP import RequestManager
0019 from DQMServices.Components.X509 import SSLOptions
0020 import os, sys, re, pycurl, urllib
0021 from optparse import OptionParser
0022 from time import time, strptime, strftime, gmtime
0023 from calendar import timegm
0024 from datetime import datetime
0025 from urllib.parse import urlparse, quote
0026 from tempfile import mkstemp
0027 from traceback import print_exc
0028 from functools import cmp_to_key
0029
0030 # Object types.
0031 DIR = 0
0032 FILE = 1
0033
0034 # HTTP protocol `User-agent` identification string.
0035 ident = "DQMLS/1.0 python/%s.%s.%s" % sys.version_info[:3]
0036
0037 # SSL/X509 options.
0038 ssl_opts = None
0039
0040 # HTTP request manager for content requests.
0041 reqman = None
0042
0043 # Number of HTTP requests made for content.
0044 nfetched = 0
0045
0046 # Found objects.
0047 found = []
0048
0049 def logme(msg, *args):
0050 """Generate agent log message."""
0051 procid = "[%s/%d]" % (__file__.rsplit("/", 1)[-1], os.getpid())
0052 print(datetime.now(), procid, msg % args)
0053
0054 def myumask():
0055 """Get the current process umask."""
0056 val = os.umask(0)
0057 os.umask(val)
0058 return val
0059
0060 def handle_init(c):
0061 """Prepare custom properties on download handles."""
0062 c.temp_file = None
0063 c.temp_path = None
0064 c.local_path = None
0065
0066 def request_init(c, options, path, kind):
0067 """`RequestManager` callback to initialise directory contents request."""
0068
0069 c.setopt(pycurl.URL, options.server + quote(path) + ((path != "/" and "/") or ""))
0070 assert c.temp_file == None
0071 assert c.temp_path == None
0072 assert c.local_path == None
0073
0074 # If this is file, prepare temporary destination file in the target
0075 # directory for possible download. parse_dir_and_files() will finish this off.
0076 if kind == FILE and options.download:
0077 try:
0078 (fd, tmp) = mkstemp()
0079 fp = os.fdopen(fd, 'wb')
0080 c.setopt(pycurl.WRITEFUNCTION, fp.write)
0081 c.temp_file = fp
0082 c.temp_path = tmp
0083 c.local_path = path.strip('/')
0084 c.buffer = None
0085 except Exception as e:
0086 logme("ERROR: %s: %s", path, str(e))
0087 print_exc()
0088
0089 def cleanup(c):
0090 """Clean up file copy operation, usually after any failures."""
0091 if c.temp_file:
0092 try: c.temp_file.close()
0093 except: pass
0094 if c.temp_path:
0095 try: os.remove(c.temp_path)
0096 except: pass
0097 if c.local_path:
0098 try: os.remove(c.local_path)
0099 except: pass
0100 c.temp_file = None
0101 c.temp_path = None
0102 c.local_path = None
0103 c.buffer = None
0104
0105 def report_error(c, task, errmsg, errno):
0106 """`RequestManager` callback to report directory contents request errors."""
0107 print("FAILED to retrieve %s: %s (%d)" % (task, errmsg, errno), file=sys.stderr)
0108 global nfetched; nfetched += 1
0109
0110 def parse_dir_and_files(c):
0111 """`RequestManager` callback to handle directory content response.
0112
0113 This gets called once per every directory which has been successfully
0114 retrieved from the server. It parses the HTML response and turns it
0115 into object listing with all the file meta information.
0116
0117 If verbosity has been requested, also shows simple progress bar on the
0118 search progress, one dot for every ten directories retrieved."""
0119 options, path, kind = c.task
0120 root_url = urlparse(options.server).path.rstrip('/')
0121
0122 if kind == FILE and options.download:
0123 assert c.local_path, "Expected local path property to be set"
0124 assert c.temp_file, "Exepected temporary file property to be set"
0125 try:
0126 c.setopt(pycurl.WRITEFUNCTION, lambda *args: None)
0127 c.temp_file.close()
0128 c.temp_file = None
0129
0130 os.chmod(c.temp_path, 0o0666 & ~UMASK)
0131 os.system('mv %s %s' % (c.temp_path, c.local_path))
0132
0133 c.local_path = None
0134 logme("INFO: downloaded %s", path)
0135 except Exception as e:
0136 logme("ERROR: downloading %s into %s failed: %s",
0137 path, c.local_path, str(e))
0138 print_exc()
0139 finally:
0140 cleanup(c)
0141
0142 elif kind == DIR:
0143 items = re.findall(r"<tr><td><a href='(.*?)'>(.*?)</a></td><td>(\d+| |-)</td>"
0144 r"<td>( |\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d UTC)</td>",
0145 c.buffer.getvalue().decode('utf-8'))
0146
0147 for path, name, size, date in items:
0148 assert path.startswith(root_url)
0149 path = path[len(root_url):]
0150
0151 if date == " ":
0152 date = -1
0153 else:
0154 date = timegm(strptime(date, "%Y-%m-%d %H:%M:%S %Z"))
0155
0156 if size == " " or size == "-":
0157 size = -1
0158 else:
0159 size = int(size)
0160
0161 if path.endswith("/"):
0162 assert size == -1
0163 path = path[:-1]
0164 obj = {'task': c.task,
0165 'kind': DIR,
0166 'name': name,
0167 'size': size,
0168 'date': date,
0169 'path': path}
0170 found.append(obj)
0171 if options.recursive:
0172 reqman.put((options, path, DIR))
0173 else:
0174 assert size >= 0
0175 obj = {'task': c.task,
0176 'kind': FILE,
0177 'name': name,
0178 'size': size,
0179 'date': date,
0180 'path': path}
0181 if options.match_file:
0182 if name == options.match_file:
0183 found.append(obj)
0184 if options.download:
0185 reqman.put((options, path, FILE))
0186 else:
0187 found.append(obj)
0188 if options.download:
0189 reqman.put((options, path, FILE))
0190
0191 global nfetched
0192 nfetched += 1
0193 if options.verbose and nfetched % 10 == 0:
0194 sys.stdout.write(".")
0195 sys.stdout.flush()
0196 if nfetched % 750 == 0:
0197 print()
0198
0199 def objcmp(a, b):
0200 diff = 0
0201 diff = a['date'] - b['date']
0202 if diff:
0203 return diff
0204 diff = a['size'] - b['size']
0205 if diff:
0206 return diff
0207 return diff
0208
0209 # Parse command line options.
0210 op = OptionParser(usage = __doc__)
0211 op.add_option("-s", "--server", dest = "server",
0212 type = "string", action = "store", metavar = "SERVER",
0213 default = "https://cmsweb.cern.ch/dqm/offline/data/browse",
0214 help = "Pull content from SERVER")
0215 op.add_option("-n", "--connections", dest = "connections",
0216 type = "int", action = "store", metavar = "NUM",
0217 default = 10, help = "Use NUM concurrent connections")
0218 op.add_option("-v", "--verbose", dest = "verbose",
0219 action = "store_true", default = False,
0220 help = "Show verbose scan information")
0221 op.add_option("-r", "--recursive", dest = "recursive",
0222 action = "store_true", default = False,
0223 help = "Perform a recursive scan starting from the root directory.")
0224 op.add_option("-d", "--download", dest = "download",
0225 action = "store_true", default = False,
0226 help = "Download all touched ROOT files locally. To be used with extreme care.")
0227 op.add_option("-m", "--match_file", dest = "match_file",
0228 type= "string", action = "store", default = "",
0229 help = "Filter results based on exact file name matching.")
0230 options, args = op.parse_args()
0231 if args:
0232 print("Too many arguments", file=sys.stderr)
0233 sys.exit(1)
0234 if not options.server:
0235 print("Server contact string required", sys.stderr)
0236 sys.exit(1)
0237
0238 UMASK = myumask()
0239
0240 # In case a user specifies a root file as address, remove it from the
0241 # address and return results that match only that name.
0242 gr = re.match("(.*)/(DQM_V.*\.root)$", options.server)
0243 if gr:
0244 options.server = gr.group(1)
0245 options.match_file = gr.group(2)
0246
0247 # Get SSL X509 parametres.
0248 ssl_opts = SSLOptions()
0249 if options.verbose:
0250 print("Using SSL cert dir", ssl_opts.ca_path)
0251 print("Using SSL private key", ssl_opts.key_file)
0252 print("Using SSL public key", ssl_opts.cert_file)
0253
0254 # Start a request manager for contents.
0255 reqman = RequestManager(num_connections = options.connections,
0256 ssl_opts = ssl_opts,
0257 user_agent = ident,
0258 request_init = request_init,
0259 request_respond = parse_dir_and_files,
0260 request_error = report_error,
0261 handle_init = handle_init)
0262
0263 # Process from root directory.
0264 start = time()
0265 reqman.put((options, "/", DIR))
0266 reqman.process()
0267 end = time()
0268
0269 for x in sorted(found, key=cmp_to_key(objcmp)):
0270 print("%20s\t%s\t%s" % (x['size'],
0271 strftime("%Y-%m-%d %H:%M:%S %Z", gmtime(x['date'])),
0272 ((options.recursive and x['path']) or x['name'])))
0273
0274 if options.verbose:
0275 print("\nFound %d directories, %d objects in %.3f seconds" % (nfetched, len(found), end - start))