Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:30

0001 #!/usr/bin/env python3
0002 #pylint: disable-msg=C0301,C0103
0003 
0004 """
0005 DAS command line tool
0006 """
0007 from __future__ import print_function
0008 __author__ = "Valentin Kuznetsov"
0009 
0010 import sys
0011 if  sys.version_info < (2, 6):
0012     raise Exception("DAS requires python 2.6 or greater")
0013 
0014 import re
0015 import time
0016 import json
0017 import urllib
0018 import urllib2
0019 from   optparse import OptionParser, OptionGroup
0020 
0021 class DASOptionParser: 
0022     """
0023     DAS cache client option parser
0024     """
0025     def __init__(self, usage = None):
0026         if usage is None:
0027             usage = 'usage: %prog [options] --query "dataset=/HT/Run2011A-*/AOD"'
0028         self.parser = OptionParser(usage=usage)
0029         group = OptionGroup(self.parser,"Das options","These options relate to the Das client interface.")
0030         group.add_option("-v", "--verbose", action="store", 
0031                                type="int", default=0, dest="verbose",
0032              help="verbose output")
0033         group.add_option("--query", action="store", type="string", 
0034                                default=False, dest="query",
0035              help="specify query for your request")
0036         group.add_option("--host", action="store", type="string", 
0037                                default='https://cmsweb.cern.ch', dest="host",
0038              help="specify host name of DAS cache server, default https://cmsweb.cern.ch")
0039         group.add_option("--idx", action="store", type="int", 
0040                                default=0, dest="idx",
0041              help="start index for returned result set, aka pagination, use w/ limit")
0042         group.add_option("--limit", action="store", type="int", 
0043                                default=10, dest="limit",
0044              help="number of returned results (results per page)")
0045         group.add_option("--format", action="store", type="string", 
0046                                default="json", dest="format",
0047              help="specify return data format (json or plain), default json")
0048         self.parser.add_option_group(group)
0049     def get_opt(self):
0050         """
0051         Returns parse list of options
0052         """
0053         return self.parser.parse_args()
0054 
0055 def get_value(data, filters):
0056     """Filter data from a row for given list of filters"""
0057     for ftr in filters:
0058         if  ftr.find('>') != -1 or ftr.find('<') != -1 or ftr.find('=') != -1:
0059             continue
0060         row = dict(data)
0061         for key in ftr.split('.'):
0062             if  isinstance(row, dict) and key in row:
0063                 row = row[key]
0064             if  isinstance(row, list):
0065                 for item in row:
0066                     if  isinstance(item, dict) and key in item:
0067                         row = item[key]
0068                         break
0069         yield str(row)
0070 
0071 def get_data(host, query, idx, limit, debug):
0072     """Contact DAS server and retrieve data for given DAS query"""
0073     params  = {'input':query, 'idx':idx, 'limit':limit}
0074     path    = '/das/cache'
0075     pat     = re.compile('http[s]{0,1}://')
0076     if  not pat.match(host):
0077         msg = 'Invalid hostname: %s' % host
0078         raise Exception(msg)
0079     url = host + path
0080     headers = {"Accept": "application/json"}
0081     encoded_data = urllib.urlencode(params, doseq=True)
0082     url += '?%s' % encoded_data
0083     req  = urllib2.Request(url=url, headers=headers)
0084     if  debug:
0085         hdlr = urllib2.HTTPHandler(debuglevel=1)
0086         opener = urllib2.build_opener(hdlr)
0087     else:
0088         opener = urllib2.build_opener()
0089     fdesc = opener.open(req)
0090     data = fdesc.read()
0091     fdesc.close()
0092 
0093     pat = re.compile(r'^[a-z0-9]{32}')
0094     if  data and isinstance(data, str) and pat.match(data) and len(data) == 32:
0095         pid = data
0096     else:
0097         pid = None
0098     count = 5  # initial waiting time in seconds
0099     timeout = 30 # final waiting time in seconds
0100     while pid:
0101         params.update({'pid':data})
0102         encoded_data = urllib.urlencode(params, doseq=True)
0103         url  = host + path + '?%s' % encoded_data
0104         req  = urllib2.Request(url=url, headers=headers)
0105         try:
0106             fdesc = opener.open(req)
0107             data = fdesc.read()
0108             fdesc.close()
0109         except urllib2.HTTPError as err:
0110             print(err)
0111             return ""
0112         if  data and isinstance(data, str) and pat.match(data) and len(data) == 32:
0113             pid = data
0114         else:
0115             pid = None
0116         time.sleep(count)
0117         if  count < timeout:
0118             count *= 2
0119         else:
0120             count = timeout
0121     return data
0122 
0123 def main():
0124     """Main function"""
0125     optmgr  = DASOptionParser()
0126     opts, _ = optmgr.get_opt()
0127     host    = opts.host
0128     debug   = opts.verbose
0129     query   = opts.query
0130     idx     = opts.idx
0131     limit   = opts.limit
0132     if  not query:
0133         raise Exception('You must provide input query')
0134     data    = get_data(host, query, idx, limit, debug)
0135     if  opts.format == 'plain':
0136         jsondict = json.loads(data)
0137         mongo_query = jsondict['mongo_query']
0138         if  'filters' in mongo_query:
0139             filters = mongo_query['filters']
0140             data = jsondict['data']
0141             if  isinstance(data, dict):
0142                 rows = [r for r in get_value(data, filters)]
0143                 print(' '.join(rows))
0144             elif isinstance(data, list):
0145                 for row in data:
0146                     rows = [r for r in get_value(row, filters)]
0147                     print(' '.join(rows))
0148             else:
0149                 print(jsondict)
0150     else:
0151         print(data)
0152