File indexing completed on 2024-04-06 12:23:57
0001 from __future__ import print_function
0002 from builtins import range
0003 import FWCore.ParameterSet.Config as cms
0004
0005 from PhysicsTools.PatAlgos.tools.ConfigToolBase import *
0006 from PhysicsTools.PatAlgos.tools.helpers import *
0007 from PhysicsTools.PatAlgos.tools.jetTools import *
0008 from Configuration.AlCa.autoCond import autoCond
0009 import Utilities.General.cmssw_das_client as das_client
0010 import os
0011 import socket
0012
0013
0014
0015
0016
0017
0018 class PickRelValInputFiles( ConfigToolBase ):
0019 """ Picks up RelVal input files automatically and
0020 returns a vector of strings with the paths to be used in [PoolSource].fileNames
0021 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
0022 - useDAS : switch to perform query in DAS rather than in DBS
0023 optional; default: False
0024 - cmsswVersion : CMSSW release to pick up the RelVal files from
0025 optional; default: the current release (determined automatically from environment)
0026 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
0027 applies also, if 'cmsswVersion' is set explicitly
0028 optional; default: False
0029 - relVal : RelVal sample to be used
0030 optional; default: 'RelValTTbar'
0031 - dataTier : data tier to be used
0032 optional; default: 'GEN-SIM-RECO'
0033 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
0034 possibly overwritten, if 'globalTag' is set explicitly
0035 optional; default: 'startup'
0036 - globalTag : name of GlobalTag as it is used in the data path of the RelVals
0037 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
0038 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
0039 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
0040 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
0041 - maxVersions : max. versioning number of RelVal to check
0042 optional; default: 9
0043 - skipFiles : number of files to skip for a found RelVal sample
0044 optional; default: 0
0045 - numberOfFiles: number of files to pick up
0046 setting it to negative values, returns all found ('skipFiles' remains active though)
0047 optional; default: -1
0048 - debug : switch to enable enhanced messages in 'stdout'
0049 optional; default: False
0050 """
0051
0052 _label = 'pickRelValInputFiles'
0053 _defaultParameters = dicttypes.SortedKeysDict()
0054
0055 def getDefaultParameters( self ):
0056 return self._defaultParameters
0057
0058 def __init__( self ):
0059 ConfigToolBase.__init__( self )
0060 self.addParameter( self._defaultParameters, 'useDAS' , False , '' )
0061 self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' )
0062 self.addParameter( self._defaultParameters, 'formerVersion', False , '' )
0063 self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' )
0064 self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' )
0065 self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' )
0066 gt = autoCond[ self.getDefaultParameters()[ 'condition' ].value ]
0067 if isinstance(gt,tuple) or isinstance(gt,list):
0068 gt = gt[0]
0069 self.addParameter( self._defaultParameters, 'globalTag' , gt[ : -5 ] , 'auto from \'condition\'' )
0070 self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' )
0071 self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' )
0072 self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' )
0073 self.addParameter( self._defaultParameters, 'debug' , False , '' )
0074 self._parameters = copy.deepcopy( self._defaultParameters )
0075 self._comment = ""
0076
0077 def __call__( self
0078 , useDAS = None
0079 , cmsswVersion = None
0080 , formerVersion = None
0081 , relVal = None
0082 , dataTier = None
0083 , condition = None
0084 , globalTag = None
0085 , maxVersions = None
0086 , skipFiles = None
0087 , numberOfFiles = None
0088 , debug = None
0089 ):
0090 if useDAS is None:
0091 useDAS = self.getDefaultParameters()[ 'useDAS' ].value
0092 if cmsswVersion is None:
0093 cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
0094 if formerVersion is None:
0095 formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
0096 if relVal is None:
0097 relVal = self.getDefaultParameters()[ 'relVal' ].value
0098 if dataTier is None:
0099 dataTier = self.getDefaultParameters()[ 'dataTier' ].value
0100 if condition is None:
0101 condition = self.getDefaultParameters()[ 'condition' ].value
0102 if globalTag is None:
0103 globalTag = autoCond[ condition ][ : -5 ]
0104 if maxVersions is None:
0105 maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
0106 if skipFiles is None:
0107 skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
0108 if numberOfFiles is None:
0109 numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
0110 if debug is None:
0111 debug = self.getDefaultParameters()[ 'debug' ].value
0112 self.setParameter( 'useDAS' , useDAS )
0113 self.setParameter( 'cmsswVersion' , cmsswVersion )
0114 self.setParameter( 'formerVersion', formerVersion )
0115 self.setParameter( 'relVal' , relVal )
0116 self.setParameter( 'dataTier' , dataTier )
0117 self.setParameter( 'condition' , condition )
0118 self.setParameter( 'globalTag' , globalTag )
0119 self.setParameter( 'maxVersions' , maxVersions )
0120 self.setParameter( 'skipFiles' , skipFiles )
0121 self.setParameter( 'numberOfFiles', numberOfFiles )
0122 self.setParameter( 'debug' , debug )
0123 return self.apply()
0124
0125 def messageEmptyList( self ):
0126 print('%s DEBUG: Empty file list returned'%( self._label ))
0127 print(' This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
0128
0129 def apply( self ):
0130 useDAS = self._parameters[ 'useDAS' ].value
0131 cmsswVersion = self._parameters[ 'cmsswVersion' ].value
0132 formerVersion = self._parameters[ 'formerVersion' ].value
0133 relVal = self._parameters[ 'relVal' ].value
0134 dataTier = self._parameters[ 'dataTier' ].value
0135 condition = self._parameters[ 'condition' ].value
0136 globalTag = self._parameters[ 'globalTag' ].value
0137 maxVersions = self._parameters[ 'maxVersions' ].value
0138 skipFiles = self._parameters[ 'skipFiles' ].value
0139 numberOfFiles = self._parameters[ 'numberOfFiles' ].value
0140 debug = self._parameters[ 'debug' ].value
0141
0142 filePaths = []
0143
0144
0145 preId = '_pre'
0146 patchId = '_patch'
0147 hltPatchId = '_hltpatch'
0148 dqmPatchId = '_dqmpatch'
0149 slhcId = '_SLHC'
0150 rootId = '_root'
0151 ibId = '_X_'
0152 if patchId in cmsswVersion:
0153 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
0154 elif hltPatchId in cmsswVersion:
0155 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
0156 elif dqmPatchId in cmsswVersion:
0157 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
0158 elif rootId in cmsswVersion:
0159 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
0160 elif slhcId in cmsswVersion:
0161 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
0162 elif ibId in cmsswVersion or formerVersion:
0163 outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
0164 if len( outputTuple[ 1 ] ) != 0:
0165 print('%s INFO : SCRAM error'%( self._label ))
0166 if debug:
0167 print(' from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
0168 print()
0169 print(outputTuple[ 1 ])
0170 print()
0171 self.messageEmptyList()
0172 return filePaths
0173 versions = { 'last' :''
0174 , 'lastToLast':''
0175 }
0176 for line in outputTuple[ 0 ].splitlines():
0177 version = line.split()[ 1 ]
0178 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
0179 if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
0180 versions[ 'lastToLast' ] = versions[ 'last' ]
0181 versions[ 'last' ] = version
0182 if version == cmsswVersion:
0183 break
0184
0185 if formerVersion:
0186
0187 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
0188 versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ]
0189
0190 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
0191 versions[ 'lastToLast' ] = ''
0192 for line in outputTuple[ 0 ].splitlines():
0193 version = line.split()[ 1 ]
0194 versionParts = version.partition( preId )
0195 if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
0196 versions[ 'lastToLast' ] = version
0197 elif versions[ 'lastToLast' ] != '':
0198 break
0199
0200 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
0201 versions[ 'lastToLast' ] = ''
0202 cmsswVersion = versions[ 'lastToLast' ]
0203 else:
0204 cmsswVersion = versions[ 'last' ]
0205
0206
0207 if debug:
0208 print('%s DEBUG: Called with...'%( self._label ))
0209 for key in self._parameters.keys():
0210 print(' %s:\t'%( key ), end=' ')
0211 print(self._parameters[ key ].value, end=' ')
0212 if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
0213 print(' (default)')
0214 else:
0215 print()
0216 if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
0217 if formerVersion:
0218 print(' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
0219 else:
0220 print(' ==> modified to last valid release %s'%( cmsswVersion ))
0221
0222
0223 domain = socket.getfqdn().split( '.' )
0224 domainSE = ''
0225 if len( domain ) == 0:
0226 print('%s INFO : Cannot determine domain of this computer'%( self._label ))
0227 if debug:
0228 self.messageEmptyList()
0229 return filePaths
0230 elif os.uname()[0] == "Darwin":
0231 print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
0232 if debug:
0233 self.messageEmptyList()
0234 return filePaths
0235 elif len( domain ) == 1:
0236 print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
0237 if debug:
0238 self.messageEmptyList()
0239 return filePaths
0240 if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
0241 print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
0242 if debug:
0243 self.messageEmptyList()
0244 return filePaths
0245 if domain[ -2 ] == 'cern':
0246 domainSE = 'T2_CH_CERN'
0247 elif domain[ -2 ] == 'fnal':
0248 domainSE = 'T1_US_FNAL_MSS'
0249 if debug:
0250 print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
0251 print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))
0252
0253
0254 validVersion = 0
0255 dataset = ''
0256 datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
0257 if useDAS:
0258 if debug:
0259 print('%s DEBUG: Using DAS query'%( self._label ))
0260 dasLimit = numberOfFiles
0261 if dasLimit <= 0:
0262 dasLimit = 1
0263 for version in range( maxVersions, 0, -1 ):
0264 filePaths = []
0265 filePathsTmp = []
0266 fileCount = 0
0267 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
0268 dasQuery = 'file dataset=%s | grep file.name'%( dataset )
0269 if debug:
0270 print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
0271 print(' \'%s\''%( dasQuery ))
0272 jsondict = das_client.get_data(dasQuery,dasLimit)
0273 if debug:
0274 print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
0275 print(' \'%s\''%( jsondict ))
0276 if jsondict[ 'status' ] != 'ok':
0277 print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
0278 exit( 1 )
0279 mongo_query = jsondict[ 'mongo_query' ]
0280 filters = mongo_query[ 'filters' ]
0281 data = jsondict[ 'data' ]
0282 if debug:
0283 print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
0284 print(' \'%s\''%( mongo_query ))
0285 print('%s DEBUG: Filters in query:'%( self._label ))
0286 print(' \'%s\''%( filters ))
0287 print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
0288 print(' \'%s\''%( data ))
0289 for row in data:
0290 filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
0291 if debug:
0292 print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
0293 if len( filePath ) > 0:
0294 if validVersion != version:
0295 jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ), 999)
0296 mongo_testquery = jsontestdict[ 'mongo_query' ]
0297 testfilters = mongo_testquery[ 'filters' ]
0298 testdata = jsontestdict[ 'data' ]
0299 if debug:
0300 print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
0301 print(' \'%s\''%( jsontestdict ))
0302 print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
0303 print(' \'%s\''%( mongo_testquery ))
0304 print('%s DEBUG: Filters in query (site test):'%( self._label ))
0305 print(' \'%s\''%( testfilters ))
0306 print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
0307 print(' \'%s\''%( testdata ))
0308 foundSE = False
0309 for testrow in testdata:
0310 siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
0311 if siteName == domainSE:
0312 foundSE = True
0313 break
0314 if not foundSE:
0315 if debug:
0316 print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
0317 break
0318 validVersion = version
0319 if debug:
0320 print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
0321 if numberOfFiles == 0:
0322 break
0323
0324 if not filePath in filePathsTmp:
0325 filePathsTmp.append( filePath )
0326 if debug:
0327 print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
0328 fileCount += 1
0329
0330 if fileCount > skipFiles:
0331 filePaths.append( filePath )
0332 elif debug:
0333 print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
0334 if validVersion > 0:
0335 if numberOfFiles == 0 and debug:
0336 print('%s DEBUG: No files requested'%( self._label ))
0337 break
0338 else:
0339 if debug:
0340 print('%s DEBUG: Using DBS query'%( self._label ))
0341 print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387 if validVersion == 0:
0388 print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
0389 if debug:
0390 self.messageEmptyList()
0391 elif len( filePaths ) == 0:
0392 print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
0393 if debug:
0394 self.messageEmptyList()
0395 elif len( filePaths ) < numberOfFiles:
0396 print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))
0397
0398 if debug:
0399 print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
0400 return filePaths
0401
0402 pickRelValInputFiles = PickRelValInputFiles()