File indexing completed on 2024-11-26 02:34:35
0001
0002 '''
0003 Script fetches files matching specified RegExps from DQM GUI.
0004
0005 Author: Albertas Gimbutas, Vilnius University (LT)
0006 e-mail: albertasgim@gmail.com
0007 '''
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 import re
0023 import sys
0024 import os
0025
0026 from multiprocessing import Pool, Queue, Process
0027 from Queue import Empty
0028 from os.path import basename, isfile
0029 from optparse import OptionParser
0030 from urllib2 import build_opener, Request
0031
0032 try:
0033 from Utilities.RelMon.authentication import X509CertOpen
0034 except ImportError:
0035 from authentication import X509CertOpen
0036
0037
0038 def auth_wget(url, chunk_size=1048576):
0039 """Returns the content of specified URL, which requires authentication.
0040 If the content is bigger than 1MB, then save it to file.
0041 """
0042 opener = build_opener(X509CertOpen())
0043 url_file = opener.open(Request(url))
0044 size = int(url_file.headers["Content-Length"])
0045
0046 if size < 1048576:
0047 filename = basename(url)
0048 readed = url_file.read()
0049 if filename != '':
0050 outfile = open(filename, 'wb')
0051 outfile.write(readed)
0052 return readed
0053
0054 filename = basename(url)
0055 file_id = selected_files.index(filename)
0056
0057 if isfile("./%s" % filename):
0058 print('%d. Exsits on disk. Skipping.' % (file_id +1))
0059 return
0060
0061 print('%d. Downloading...' % (file_id +1))
0062 file = open(filename, 'wb')
0063
0064 chunk = url_file.read(chunk_size)
0065 while chunk:
0066 file.write(chunk)
0067
0068 chunk = url_file.read(chunk_size)
0069 print('%d. Done.' % (file_id +1))
0070 file.close()
0071
0072
0073
0074 parser = OptionParser(usage='usage: %prog [options]')
0075 parser.add_option('-d', '--data', action='store_true', dest='is_from_data',
0076 help='Fetch data relvals.')
0077 parser.add_option('-m', '--mc', action='store_false', dest='is_from_data',
0078 help='Fetch Monte Carlo relvals.')
0079 parser.add_option('-r', '--release', action='store', dest='release',
0080 help='Release to fetch from. RELEASE format "CMSSW_x_x_x", e.g. CMSSW_5_3_2.')
0081 parser.add_option('-e', '--re', '--regexp', action='store', dest='regexp', default='',
0082 help='Comma separated regular expresions for file names. e.g. to fetch '+
0083 'files, which names contain "cos" or "jet" and does not contain "2010", use: '+
0084 '"cos,jet,^((?!2010).)*$".')
0085 parser.add_option('--mthreads', action='store', default='3', dest='mthreads',
0086 help='Number of threads for file download. Default is 3.')
0087 parser.add_option('--dry', action='store_true', default=False, dest='dry_run',
0088 help='Show files matched by regular expresion, but do not download them.')
0089
0090 (options, args) = parser.parse_args()
0091 options.release = options.release.strip('"\'=')
0092 options.regexp = options.regexp.strip('"\'=')
0093
0094
0095 if options.is_from_data is None:
0096 parser.error('You have to specify the directory, use --mc for "RelVal" or ' +
0097 '--data for "RelValData"')
0098 elif options.release is None:
0099 parser.error('You have to specify the CMSSW release, use --release option. ' +
0100 'E.g. --release CMSSW_5_3_2')
0101 elif not options.mthreads.isdigit():
0102 parser.error('Bad --mthreads argument format. It has to be integer. E.g. ' +
0103 '--mthreads 3')
0104
0105
0106 relvaldir = "RelVal"
0107 if options.is_from_data:
0108 relvaldir = "RelValData"
0109
0110 release = re.findall('(CMSSW_\d*_\d*_)\d*(?:_[\w\d]*)?', options.release)
0111 if not release:
0112 parser.error('No such CMSSW release found. Please check the ``--release`` commandline option value.')
0113 releasedir = release[0] + "x"
0114
0115 base_url = 'https://cmsweb.cern.ch/dqm/relval/data/browse/ROOT/'
0116 filedir_url = base_url + relvaldir + '/' + releasedir + '/'
0117 filedir_html = auth_wget(filedir_url)
0118
0119
0120
0121
0122 file_list_re = re.compile(r"<a href='[-./\w]*'>([-./\w]*)<")
0123 all_files = file_list_re.findall(filedir_html)[1:]
0124
0125 options.mthreads = int(options.mthreads)
0126 if options.mthreads > 3 or options.mthreads < 1:
0127 options.mthreads = 3
0128
0129
0130 file_res = [re.compile(r) for r in options.regexp.split(',') + [options.release]]
0131 selected_files = [f for f in all_files if all([r.search(f) for r in file_res])]
0132
0133 print('Downloading files:')
0134 for i, name in enumerate(selected_files):
0135 print('%d. %s' % (i+1, name))
0136
0137 if not options.dry_run:
0138 print('\nProgress:')
0139 pool = Pool(options.mthreads)
0140 pool.map(auth_wget, [filedir_url + name for name in selected_files])