Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:57

0001 from __future__ import print_function
0002 from builtins import range
0003 import FWCore.ParameterSet.Config as cms
0004 
0005 from PhysicsTools.PatAlgos.tools.ConfigToolBase import *
0006 from PhysicsTools.PatAlgos.tools.helpers import *
0007 from PhysicsTools.PatAlgos.tools.jetTools import *
0008 from Configuration.AlCa.autoCond import autoCond
0009 import Utilities.General.cmssw_das_client as das_client
0010 import os
0011 import socket
0012 
0013 
0014 ## ------------------------------------------------------
0015 ## Automatic pick-up of RelVal input files
0016 ## ------------------------------------------------------
0017 
0018 class PickRelValInputFiles( ConfigToolBase ):
0019     """  Picks up RelVal input files automatically and
0020   returns a vector of strings with the paths to be used in [PoolSource].fileNames
0021     PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
0022     - useDAS       : switch to perform query in DAS rather than in DBS
0023                      optional; default: False
0024     - cmsswVersion : CMSSW release to pick up the RelVal files from
0025                      optional; default: the current release (determined automatically from environment)
0026     - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
0027                      applies also, if 'cmsswVersion' is set explicitly
0028                      optional; default: False
0029     - relVal       : RelVal sample to be used
0030                      optional; default: 'RelValTTbar'
0031     - dataTier     : data tier to be used
0032                      optional; default: 'GEN-SIM-RECO'
0033     - condition    : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
0034                      possibly overwritten, if 'globalTag' is set explicitly
0035                      optional; default: 'startup'
0036     - globalTag    : name of GlobalTag as it is used in the data path of the RelVals
0037                      optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
0038       !!!            Determination is done for the release one runs in, not for the release the RelVals have been produced in.
0039       !!!            Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
0040                      but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
0041     - maxVersions  : max. versioning number of RelVal to check
0042                      optional; default: 9
0043     - skipFiles    : number of files to skip for a found RelVal sample
0044                      optional; default: 0
0045     - numberOfFiles: number of files to pick up
0046                      setting it to negative values, returns all found ('skipFiles' remains active though)
0047                      optional; default: -1
0048     - debug        : switch to enable enhanced messages in 'stdout'
0049                      optional; default: False
0050     """
0051 
0052     _label             = 'pickRelValInputFiles'
0053     _defaultParameters = dicttypes.SortedKeysDict()
0054 
0055     def getDefaultParameters( self ):
0056         return self._defaultParameters
0057 
0058     def __init__( self ):
0059         ConfigToolBase.__init__( self )
0060         self.addParameter( self._defaultParameters, 'useDAS'       , False                                                               , '' )
0061         self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" )                                        , 'auto from environment' )
0062         self.addParameter( self._defaultParameters, 'formerVersion', False                                                               , '' )
0063         self.addParameter( self._defaultParameters, 'relVal'       , 'RelValTTbar'                                                       , '' )
0064         self.addParameter( self._defaultParameters, 'dataTier'     , 'GEN-SIM-RECO'                                                      , '' )
0065         self.addParameter( self._defaultParameters, 'condition'    , 'startup'                                                           , '' )
0066         gt = autoCond[ self.getDefaultParameters()[ 'condition' ].value ]
0067         if isinstance(gt,tuple) or isinstance(gt,list):
0068             gt = gt[0]
0069         self.addParameter( self._defaultParameters, 'globalTag'    , gt[ : -5 ]                                                          , 'auto from \'condition\'' )
0070         self.addParameter( self._defaultParameters, 'maxVersions'  , 3                                                                   , '' )
0071         self.addParameter( self._defaultParameters, 'skipFiles'    , 0                                                                   , '' )
0072         self.addParameter( self._defaultParameters, 'numberOfFiles', -1                                                                  , 'all' )
0073         self.addParameter( self._defaultParameters, 'debug'        , False                                                               , '' )
0074         self._parameters = copy.deepcopy( self._defaultParameters )
0075         self._comment = ""
0076 
0077     def __call__( self
0078                 , useDAS        = None
0079                 , cmsswVersion  = None
0080                 , formerVersion = None
0081                 , relVal        = None
0082                 , dataTier      = None
0083                 , condition     = None
0084                 , globalTag     = None
0085                 , maxVersions   = None
0086                 , skipFiles     = None
0087                 , numberOfFiles = None
0088                 , debug         = None
0089                 ):
0090         if useDAS is None:
0091             useDAS = self.getDefaultParameters()[ 'useDAS' ].value
0092         if cmsswVersion is None:
0093             cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
0094         if formerVersion is None:
0095             formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
0096         if relVal is None:
0097             relVal = self.getDefaultParameters()[ 'relVal' ].value
0098         if dataTier is None:
0099             dataTier = self.getDefaultParameters()[ 'dataTier' ].value
0100         if condition is None:
0101             condition = self.getDefaultParameters()[ 'condition' ].value
0102         if globalTag is None:
0103             globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition'
0104         if maxVersions is None:
0105             maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
0106         if skipFiles is None:
0107             skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
0108         if numberOfFiles is None:
0109             numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
0110         if debug is None:
0111             debug = self.getDefaultParameters()[ 'debug' ].value
0112         self.setParameter( 'useDAS'       , useDAS )
0113         self.setParameter( 'cmsswVersion' , cmsswVersion )
0114         self.setParameter( 'formerVersion', formerVersion )
0115         self.setParameter( 'relVal'       , relVal )
0116         self.setParameter( 'dataTier'     , dataTier )
0117         self.setParameter( 'condition'    , condition )
0118         self.setParameter( 'globalTag'    , globalTag )
0119         self.setParameter( 'maxVersions'  , maxVersions )
0120         self.setParameter( 'skipFiles'    , skipFiles )
0121         self.setParameter( 'numberOfFiles', numberOfFiles )
0122         self.setParameter( 'debug'        , debug )
0123         return self.apply()
0124 
0125     def messageEmptyList( self ):
0126         print('%s DEBUG: Empty file list returned'%( self._label ))
0127         print('    This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
0128 
0129     def apply( self ):
0130         useDAS        = self._parameters[ 'useDAS'        ].value
0131         cmsswVersion  = self._parameters[ 'cmsswVersion'  ].value
0132         formerVersion = self._parameters[ 'formerVersion' ].value
0133         relVal        = self._parameters[ 'relVal'        ].value
0134         dataTier      = self._parameters[ 'dataTier'      ].value
0135         condition     = self._parameters[ 'condition'     ].value # only used for GT determination in initialization, if GT not explicitly given
0136         globalTag     = self._parameters[ 'globalTag'     ].value
0137         maxVersions   = self._parameters[ 'maxVersions'   ].value
0138         skipFiles     = self._parameters[ 'skipFiles'     ].value
0139         numberOfFiles = self._parameters[ 'numberOfFiles' ].value
0140         debug         = self._parameters[ 'debug'         ].value
0141 
0142         filePaths = []
0143 
0144         # Determine corresponding CMSSW version for RelVals
0145         preId      = '_pre'
0146         patchId    = '_patch'    # patch releases
0147         hltPatchId = '_hltpatch' # HLT patch releases
0148         dqmPatchId = '_dqmpatch' # DQM patch releases
0149         slhcId     = '_SLHC'     # SLHC releases
0150         rootId     = '_root'     # ROOT test releases
0151         ibId       = '_X_'       # IBs
0152         if patchId in cmsswVersion:
0153             cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
0154         elif hltPatchId in cmsswVersion:
0155             cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
0156         elif dqmPatchId in cmsswVersion:
0157             cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
0158         elif rootId in cmsswVersion:
0159             cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
0160         elif slhcId in cmsswVersion:
0161             cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
0162         elif ibId in cmsswVersion or formerVersion:
0163             outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
0164             if len( outputTuple[ 1 ] ) != 0:
0165                 print('%s INFO : SCRAM error'%( self._label ))
0166                 if debug:
0167                     print('    from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
0168                     print()
0169                     print(outputTuple[ 1 ])
0170                     print()
0171                     self.messageEmptyList()
0172                 return filePaths
0173             versions = { 'last'      :''
0174                        , 'lastToLast':''
0175                        }
0176             for line in outputTuple[ 0 ].splitlines():
0177                 version = line.split()[ 1 ]
0178                 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
0179                     if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
0180                         versions[ 'lastToLast' ] = versions[ 'last' ]
0181                         versions[ 'last' ]       = version
0182                         if version == cmsswVersion:
0183                             break
0184             # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
0185             if formerVersion:
0186                 # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
0187                 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
0188                     versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
0189                 # Use pre-release as "former version" for CMSSW_X_Y_0
0190                 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
0191                     versions[ 'lastToLast' ] = ''
0192                     for line in outputTuple[ 0 ].splitlines():
0193                         version      = line.split()[ 1 ]
0194                         versionParts = version.partition( preId )
0195                         if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
0196                             versions[ 'lastToLast' ] = version
0197                         elif versions[ 'lastToLast' ] != '':
0198                             break
0199                 # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
0200                 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
0201                     versions[ 'lastToLast' ] = '' # no alternative :-(
0202                 cmsswVersion = versions[ 'lastToLast' ]
0203             else:
0204                 cmsswVersion = versions[ 'last' ]
0205 
0206         # Debugging output
0207         if debug:
0208             print('%s DEBUG: Called with...'%( self._label ))
0209             for key in self._parameters.keys():
0210                print('    %s:\t'%( key ), end=' ')
0211                print(self._parameters[ key ].value, end=' ')
0212                if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
0213                    print(' (default)')
0214                else:
0215                    print()
0216                if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
0217                    if formerVersion:
0218                        print('    ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
0219                    else:
0220                        print('    ==> modified to last valid release %s'%( cmsswVersion ))
0221 
0222         # Check domain
0223         domain = socket.getfqdn().split( '.' )
0224         domainSE = ''
0225         if len( domain ) == 0:
0226             print('%s INFO : Cannot determine domain of this computer'%( self._label ))
0227             if debug:
0228                 self.messageEmptyList()
0229             return filePaths
0230         elif os.uname()[0] == "Darwin":
0231             print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
0232             if debug:
0233                 self.messageEmptyList()
0234             return filePaths
0235         elif len( domain ) == 1:
0236             print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
0237             if debug:
0238                 self.messageEmptyList()
0239             return filePaths
0240         if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
0241             print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
0242             if debug:
0243                 self.messageEmptyList()
0244             return filePaths
0245         if domain[ -2 ] == 'cern':
0246             domainSE = 'T2_CH_CERN'
0247         elif domain[ -2 ] == 'fnal':
0248             domainSE = 'T1_US_FNAL_MSS'
0249         if debug:
0250             print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
0251             print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))
0252 
0253         # Find files
0254         validVersion = 0
0255         dataset    = ''
0256         datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
0257         if useDAS:
0258             if debug:
0259                 print('%s DEBUG: Using DAS query'%( self._label ))
0260             dasLimit = numberOfFiles
0261             if dasLimit <= 0:
0262                 dasLimit = 1
0263             for version in range( maxVersions, 0, -1 ):
0264                 filePaths    = []
0265                 filePathsTmp = []
0266                 fileCount    = 0
0267                 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
0268                 dasQuery = 'file dataset=%s | grep file.name'%( dataset )
0269                 if debug:
0270                     print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
0271                     print('    \'%s\''%( dasQuery ))
0272                 jsondict = das_client.get_data(dasQuery,dasLimit)
0273                 if debug:
0274                     print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
0275                     print('    \'%s\''%( jsondict ))
0276                 if jsondict[ 'status' ] != 'ok':
0277                     print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
0278                     exit( 1 )
0279                 mongo_query = jsondict[ 'mongo_query' ]
0280                 filters     = mongo_query[ 'filters' ]
0281                 data        = jsondict[ 'data' ]
0282                 if debug:
0283                     print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
0284                     print('    \'%s\''%( mongo_query ))
0285                     print('%s DEBUG: Filters in query:'%( self._label ))
0286                     print('    \'%s\''%( filters ))
0287                     print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
0288                     print('    \'%s\''%( data ))
0289                 for row in data:
0290                     filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
0291                     if debug:
0292                         print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
0293                     if len( filePath ) > 0:
0294                         if validVersion != version:
0295                             jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ),  999)
0296                             mongo_testquery = jsontestdict[ 'mongo_query' ]
0297                             testfilters = mongo_testquery[ 'filters' ]
0298                             testdata    = jsontestdict[ 'data' ]
0299                             if debug:
0300                                 print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
0301                                 print('    \'%s\''%( jsontestdict ))
0302                                 print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
0303                                 print('    \'%s\''%( mongo_testquery ))
0304                                 print('%s DEBUG: Filters in query (site test):'%( self._label ))
0305                                 print('    \'%s\''%( testfilters ))
0306                                 print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
0307                                 print('    \'%s\''%( testdata ))
0308                             foundSE = False
0309                             for testrow in testdata:
0310                                 siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
0311                                 if siteName == domainSE:
0312                                     foundSE = True
0313                                     break
0314                             if not foundSE:
0315                                 if debug:
0316                                     print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
0317                                 break
0318                             validVersion = version
0319                             if debug:
0320                                 print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
0321                         if numberOfFiles == 0:
0322                             break
0323                         # protect from double entries ( 'unique' flag in query does not work here)
0324                         if not filePath in filePathsTmp:
0325                             filePathsTmp.append( filePath )
0326                             if debug:
0327                                 print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
0328                             fileCount += 1
0329                             # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
0330                             if fileCount > skipFiles:
0331                                 filePaths.append( filePath )
0332                         elif debug:
0333                             print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
0334                 if validVersion > 0:
0335                     if numberOfFiles == 0 and debug:
0336                         print('%s DEBUG: No files requested'%( self._label ))
0337                     break
0338         else:
0339             if debug:
0340                 print('%s DEBUG: Using DBS query'%( self._label ))
0341             print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
0342             #for version in range( maxVersions, 0, -1 ):
0343                 #filePaths = []
0344                 #fileCount = 0
0345                 #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
0346                 #dbsQuery = 'find file where dataset = %s'%( dataset )
0347                 #if debug:
0348                     #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
0349                     #print '    \'%s\''%( dbsQuery )
0350                 #foundSE = False
0351                 #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines():
0352                     #if line.find( '.root' ) != -1:
0353                         #if validVersion != version:
0354                             #if not foundSE:
0355                                 #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
0356                                 #if debug:
0357                                     #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
0358                                     #print '    \'%s\''%( dbsSiteQuery )
0359                                 #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines():
0360                                     #if lineSite.find( dataset ) != -1:
0361                                         #foundSE = True
0362                                         #break
0363                             #if not foundSE:
0364                                 #if debug:
0365                                     #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
0366                                 #break
0367                             #validVersion = version
0368                             #if debug:
0369                                 #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
0370                         #if numberOfFiles == 0:
0371                             #break
0372                         #filePath = line.replace( '\n', '' )
0373                         #if debug:
0374                             #print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
0375                         #fileCount += 1
0376                         #if fileCount > skipFiles:
0377                             #filePaths.append( filePath )
0378                         #if not numberOfFiles < 0:
0379                             #if numberOfFiles <= len( filePaths ):
0380                                 #break
0381                 #if validVersion > 0:
0382                     #if numberOfFiles == 0 and debug:
0383                         #print '%s DEBUG: No files requested'%( self._label )
0384                     #break
0385 
0386         # Check output and return
0387         if validVersion == 0:
0388             print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
0389             if debug:
0390                 self.messageEmptyList()
0391         elif len( filePaths ) == 0:
0392             print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
0393             if debug:
0394                 self.messageEmptyList()
0395         elif len( filePaths ) < numberOfFiles:
0396             print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))
0397 
0398         if debug:
0399             print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
0400         return filePaths
0401 
0402 pickRelValInputFiles = PickRelValInputFiles()