File indexing completed on 2023-03-17 11:26:50
0001
0002 '''
0003 Script fetches files matching specified RegExps from DQM GUI.
0004
0005 Author: Albertas Gimbutas, Vilnius University (LT)
0006 e-mail: albertasgim@gmail.com
0007 '''
0008 from __future__ import print_function
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 import re
0024 import sys
0025 import os
0026
0027 from multiprocessing import Pool, Queue, Process
0028 from Queue import Empty
0029 from os.path import basename, isfile
0030 from optparse import OptionParser
0031 from urllib2 import build_opener, Request
0032
0033 try:
0034 from Utilities.RelMon.authentication import X509CertOpen
0035 except ImportError:
0036 from authentication import X509CertOpen
0037
0038
0039 def auth_wget(url, chunk_size=1048576):
0040 """Returns the content of specified URL, which requires authentication.
0041 If the content is bigger than 1MB, then save it to file.
0042 """
0043 opener = build_opener(X509CertOpen())
0044 url_file = opener.open(Request(url))
0045 size = int(url_file.headers["Content-Length"])
0046
0047 if size < 1048576:
0048 filename = basename(url)
0049 readed = url_file.read()
0050 if filename != '':
0051 outfile = open(filename, 'wb')
0052 outfile.write(readed)
0053 return readed
0054
0055 filename = basename(url)
0056 file_id = selected_files.index(filename)
0057
0058 if isfile("./%s" % filename):
0059 print('%d. Exsits on disk. Skipping.' % (file_id +1))
0060 return
0061
0062 print('%d. Downloading...' % (file_id +1))
0063 file = open(filename, 'wb')
0064
0065 chunk = url_file.read(chunk_size)
0066 while chunk:
0067 file.write(chunk)
0068
0069 chunk = url_file.read(chunk_size)
0070 print('%d. Done.' % (file_id +1))
0071 file.close()
0072
0073
0074
0075 parser = OptionParser(usage='usage: %prog [options]')
0076 parser.add_option('-d', '--data', action='store_true', dest='is_from_data',
0077 help='Fetch data relvals.')
0078 parser.add_option('-m', '--mc', action='store_false', dest='is_from_data',
0079 help='Fetch Monte Carlo relvals.')
0080 parser.add_option('-r', '--release', action='store', dest='release',
0081 help='Release to fetch from. RELEASE format "CMSSW_x_x_x", e.g. CMSSW_5_3_2.')
0082 parser.add_option('-e', '--re', '--regexp', action='store', dest='regexp', default='',
0083 help='Comma separated regular expresions for file names. e.g. to fetch '+
0084 'files, which names contain "cos" or "jet" and does not contain "2010", use: '+
0085 '"cos,jet,^((?!2010).)*$".')
0086 parser.add_option('--mthreads', action='store', default='3', dest='mthreads',
0087 help='Number of threads for file download. Default is 3.')
0088 parser.add_option('--dry', action='store_true', default=False, dest='dry_run',
0089 help='Show files matched by regular expresion, but do not download them.')
0090
0091 (options, args) = parser.parse_args()
0092 options.release = options.release.strip('"\'=')
0093 options.regexp = options.regexp.strip('"\'=')
0094
0095
0096 if options.is_from_data is None:
0097 parser.error('You have to specify the directory, use --mc for "RelVal" or ' +
0098 '--data for "RelValData"')
0099 elif options.release is None:
0100 parser.error('You have to specify the CMSSW release, use --release option. ' +
0101 'E.g. --release CMSSW_5_3_2')
0102 elif not options.mthreads.isdigit():
0103 parser.error('Bad --mthreads argument format. It has to be integer. E.g. ' +
0104 '--mthreads 3')
0105
0106
0107 relvaldir = "RelVal"
0108 if options.is_from_data:
0109 relvaldir = "RelValData"
0110
0111 release = re.findall('(CMSSW_\d*_\d*_)\d*(?:_[\w\d]*)?', options.release)
0112 if not release:
0113 parser.error('No such CMSSW release found. Please check the ``--release`` commandline option value.')
0114 releasedir = release[0] + "x"
0115
0116 base_url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/'
0117 filedir_url = base_url + relvaldir + '/' + releasedir + '/'
0118 filedir_html = auth_wget(filedir_url)
0119
0120
0121
0122
0123 file_list_re = re.compile(r"<a href='[-./\w]*'>([-./\w]*)<")
0124 all_files = file_list_re.findall(filedir_html)[1:]
0125
0126 options.mthreads = int(options.mthreads)
0127 if options.mthreads > 3 or options.mthreads < 1:
0128 options.mthreads = 3
0129
0130
0131 file_res = [re.compile(r) for r in options.regexp.split(',') + [options.release]]
0132 selected_files = [f for f in all_files if all([r.search(f) for r in file_res])]
0133
0134 print('Downloading files:')
0135 for i, name in enumerate(selected_files):
0136 print('%d. %s' % (i+1, name))
0137
0138 if not options.dry_run:
0139 print('\nProgress:')
0140 pool = Pool(options.mthreads)
0141 pool.map(auth_wget, [filedir_url + name for name in selected_files])