File indexing completed on 2024-11-27 03:17:59
0001 from builtins import range
0002 import FWCore.ParameterSet.Config as cms
0003
0004 from PhysicsTools.PatAlgos.tools.ConfigToolBase import *
0005 from PhysicsTools.PatAlgos.tools.helpers import *
0006 from PhysicsTools.PatAlgos.tools.jetTools import *
0007 from Configuration.AlCa.autoCond import autoCond
0008 import Utilities.General.cmssw_das_client as das_client
0009 import os
0010 import socket
0011
0012
0013
0014
0015
0016
0017 class PickRelValInputFiles( ConfigToolBase ):
0018 """ Picks up RelVal input files automatically and
0019 returns a vector of strings with the paths to be used in [PoolSource].fileNames
0020 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
0021 - useDAS : switch to perform query in DAS rather than in DBS
0022 optional; default: False
0023 - cmsswVersion : CMSSW release to pick up the RelVal files from
0024 optional; default: the current release (determined automatically from environment)
0025 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
0026 applies also, if 'cmsswVersion' is set explicitly
0027 optional; default: False
0028 - relVal : RelVal sample to be used
0029 optional; default: 'RelValTTbar'
0030 - dataTier : data tier to be used
0031 optional; default: 'GEN-SIM-RECO'
0032 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
0033 possibly overwritten, if 'globalTag' is set explicitly
0034 optional; default: 'startup'
0035 - globalTag : name of GlobalTag as it is used in the data path of the RelVals
0036 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
0037 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
0038 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
0039 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
0040 - maxVersions : max. versioning number of RelVal to check
0041 optional; default: 9
0042 - skipFiles : number of files to skip for a found RelVal sample
0043 optional; default: 0
0044 - numberOfFiles: number of files to pick up
0045 setting it to negative values, returns all found ('skipFiles' remains active though)
0046 optional; default: -1
0047 - debug : switch to enable enhanced messages in 'stdout'
0048 optional; default: False
0049 """
0050
0051 _label = 'pickRelValInputFiles'
0052 _defaultParameters = dicttypes.SortedKeysDict()
0053
0054 def getDefaultParameters( self ):
0055 return self._defaultParameters
0056
0057 def __init__( self ):
0058 ConfigToolBase.__init__( self )
0059 self.addParameter( self._defaultParameters, 'useDAS' , False , '' )
0060 self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' )
0061 self.addParameter( self._defaultParameters, 'formerVersion', False , '' )
0062 self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' )
0063 self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' )
0064 self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' )
0065 gt = autoCond[ self.getDefaultParameters()[ 'condition' ].value ]
0066 if isinstance(gt,tuple) or isinstance(gt,list):
0067 gt = gt[0]
0068 self.addParameter( self._defaultParameters, 'globalTag' , gt[ : -5 ] , 'auto from \'condition\'' )
0069 self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' )
0070 self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' )
0071 self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' )
0072 self.addParameter( self._defaultParameters, 'debug' , False , '' )
0073 self._parameters = copy.deepcopy( self._defaultParameters )
0074 self._comment = ""
0075
0076 def __call__( self
0077 , useDAS = None
0078 , cmsswVersion = None
0079 , formerVersion = None
0080 , relVal = None
0081 , dataTier = None
0082 , condition = None
0083 , globalTag = None
0084 , maxVersions = None
0085 , skipFiles = None
0086 , numberOfFiles = None
0087 , debug = None
0088 ):
0089 if useDAS is None:
0090 useDAS = self.getDefaultParameters()[ 'useDAS' ].value
0091 if cmsswVersion is None:
0092 cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
0093 if formerVersion is None:
0094 formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
0095 if relVal is None:
0096 relVal = self.getDefaultParameters()[ 'relVal' ].value
0097 if dataTier is None:
0098 dataTier = self.getDefaultParameters()[ 'dataTier' ].value
0099 if condition is None:
0100 condition = self.getDefaultParameters()[ 'condition' ].value
0101 if globalTag is None:
0102 globalTag = autoCond[ condition ][ : -5 ]
0103 if maxVersions is None:
0104 maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
0105 if skipFiles is None:
0106 skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
0107 if numberOfFiles is None:
0108 numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
0109 if debug is None:
0110 debug = self.getDefaultParameters()[ 'debug' ].value
0111 self.setParameter( 'useDAS' , useDAS )
0112 self.setParameter( 'cmsswVersion' , cmsswVersion )
0113 self.setParameter( 'formerVersion', formerVersion )
0114 self.setParameter( 'relVal' , relVal )
0115 self.setParameter( 'dataTier' , dataTier )
0116 self.setParameter( 'condition' , condition )
0117 self.setParameter( 'globalTag' , globalTag )
0118 self.setParameter( 'maxVersions' , maxVersions )
0119 self.setParameter( 'skipFiles' , skipFiles )
0120 self.setParameter( 'numberOfFiles', numberOfFiles )
0121 self.setParameter( 'debug' , debug )
0122 return self.apply()
0123
0124 def messageEmptyList( self ):
0125 print('%s DEBUG: Empty file list returned'%( self._label ))
0126 print(' This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
0127
0128 def apply( self ):
0129 useDAS = self._parameters[ 'useDAS' ].value
0130 cmsswVersion = self._parameters[ 'cmsswVersion' ].value
0131 formerVersion = self._parameters[ 'formerVersion' ].value
0132 relVal = self._parameters[ 'relVal' ].value
0133 dataTier = self._parameters[ 'dataTier' ].value
0134 condition = self._parameters[ 'condition' ].value
0135 globalTag = self._parameters[ 'globalTag' ].value
0136 maxVersions = self._parameters[ 'maxVersions' ].value
0137 skipFiles = self._parameters[ 'skipFiles' ].value
0138 numberOfFiles = self._parameters[ 'numberOfFiles' ].value
0139 debug = self._parameters[ 'debug' ].value
0140
0141 filePaths = []
0142
0143
0144 preId = '_pre'
0145 patchId = '_patch'
0146 hltPatchId = '_hltpatch'
0147 dqmPatchId = '_dqmpatch'
0148 slhcId = '_SLHC'
0149 rootId = '_root'
0150 ibId = '_X_'
0151 if patchId in cmsswVersion:
0152 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
0153 elif hltPatchId in cmsswVersion:
0154 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
0155 elif dqmPatchId in cmsswVersion:
0156 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
0157 elif rootId in cmsswVersion:
0158 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
0159 elif slhcId in cmsswVersion:
0160 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
0161 elif ibId in cmsswVersion or formerVersion:
0162 outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
0163 if len( outputTuple[ 1 ] ) != 0:
0164 print('%s INFO : SCRAM error'%( self._label ))
0165 if debug:
0166 print(' from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
0167 print()
0168 print(outputTuple[ 1 ])
0169 print()
0170 self.messageEmptyList()
0171 return filePaths
0172 versions = { 'last' :''
0173 , 'lastToLast':''
0174 }
0175 for line in outputTuple[ 0 ].splitlines():
0176 version = line.split()[ 1 ]
0177 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
0178 if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
0179 versions[ 'lastToLast' ] = versions[ 'last' ]
0180 versions[ 'last' ] = version
0181 if version == cmsswVersion:
0182 break
0183
0184 if formerVersion:
0185
0186 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
0187 versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ]
0188
0189 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
0190 versions[ 'lastToLast' ] = ''
0191 for line in outputTuple[ 0 ].splitlines():
0192 version = line.split()[ 1 ]
0193 versionParts = version.partition( preId )
0194 if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
0195 versions[ 'lastToLast' ] = version
0196 elif versions[ 'lastToLast' ] != '':
0197 break
0198
0199 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
0200 versions[ 'lastToLast' ] = ''
0201 cmsswVersion = versions[ 'lastToLast' ]
0202 else:
0203 cmsswVersion = versions[ 'last' ]
0204
0205
0206 if debug:
0207 print('%s DEBUG: Called with...'%( self._label ))
0208 for key in self._parameters.keys():
0209 print(' %s:\t'%( key ), end=' ')
0210 print(self._parameters[ key ].value, end=' ')
0211 if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
0212 print(' (default)')
0213 else:
0214 print()
0215 if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
0216 if formerVersion:
0217 print(' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
0218 else:
0219 print(' ==> modified to last valid release %s'%( cmsswVersion ))
0220
0221
0222 domain = socket.getfqdn().split( '.' )
0223 domainSE = ''
0224 if len( domain ) == 0:
0225 print('%s INFO : Cannot determine domain of this computer'%( self._label ))
0226 if debug:
0227 self.messageEmptyList()
0228 return filePaths
0229 elif os.uname()[0] == "Darwin":
0230 print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
0231 if debug:
0232 self.messageEmptyList()
0233 return filePaths
0234 elif len( domain ) == 1:
0235 print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
0236 if debug:
0237 self.messageEmptyList()
0238 return filePaths
0239 if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
0240 print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
0241 if debug:
0242 self.messageEmptyList()
0243 return filePaths
0244 if domain[ -2 ] == 'cern':
0245 domainSE = 'T2_CH_CERN'
0246 elif domain[ -2 ] == 'fnal':
0247 domainSE = 'T1_US_FNAL_MSS'
0248 if debug:
0249 print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
0250 print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))
0251
0252
0253 validVersion = 0
0254 dataset = ''
0255 datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
0256 if useDAS:
0257 if debug:
0258 print('%s DEBUG: Using DAS query'%( self._label ))
0259 dasLimit = numberOfFiles
0260 if dasLimit <= 0:
0261 dasLimit = 1
0262 for version in range( maxVersions, 0, -1 ):
0263 filePaths = []
0264 filePathsTmp = []
0265 fileCount = 0
0266 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
0267 dasQuery = 'file dataset=%s | grep file.name'%( dataset )
0268 if debug:
0269 print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
0270 print(' \'%s\''%( dasQuery ))
0271 jsondict = das_client.get_data(dasQuery,dasLimit)
0272 if debug:
0273 print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
0274 print(' \'%s\''%( jsondict ))
0275 if jsondict[ 'status' ] != 'ok':
0276 print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
0277 exit( 1 )
0278 mongo_query = jsondict[ 'mongo_query' ]
0279 filters = mongo_query[ 'filters' ]
0280 data = jsondict[ 'data' ]
0281 if debug:
0282 print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
0283 print(' \'%s\''%( mongo_query ))
0284 print('%s DEBUG: Filters in query:'%( self._label ))
0285 print(' \'%s\''%( filters ))
0286 print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
0287 print(' \'%s\''%( data ))
0288 for row in data:
0289 filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
0290 if debug:
0291 print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
0292 if len( filePath ) > 0:
0293 if validVersion != version:
0294 jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ), 999)
0295 mongo_testquery = jsontestdict[ 'mongo_query' ]
0296 testfilters = mongo_testquery[ 'filters' ]
0297 testdata = jsontestdict[ 'data' ]
0298 if debug:
0299 print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
0300 print(' \'%s\''%( jsontestdict ))
0301 print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
0302 print(' \'%s\''%( mongo_testquery ))
0303 print('%s DEBUG: Filters in query (site test):'%( self._label ))
0304 print(' \'%s\''%( testfilters ))
0305 print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
0306 print(' \'%s\''%( testdata ))
0307 foundSE = False
0308 for testrow in testdata:
0309 siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
0310 if siteName == domainSE:
0311 foundSE = True
0312 break
0313 if not foundSE:
0314 if debug:
0315 print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
0316 break
0317 validVersion = version
0318 if debug:
0319 print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
0320 if numberOfFiles == 0:
0321 break
0322
0323 if not filePath in filePathsTmp:
0324 filePathsTmp.append( filePath )
0325 if debug:
0326 print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
0327 fileCount += 1
0328
0329 if fileCount > skipFiles:
0330 filePaths.append( filePath )
0331 elif debug:
0332 print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
0333 if validVersion > 0:
0334 if numberOfFiles == 0 and debug:
0335 print('%s DEBUG: No files requested'%( self._label ))
0336 break
0337 else:
0338 if debug:
0339 print('%s DEBUG: Using DBS query'%( self._label ))
0340 print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386 if validVersion == 0:
0387 print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
0388 if debug:
0389 self.messageEmptyList()
0390 elif len( filePaths ) == 0:
0391 print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
0392 if debug:
0393 self.messageEmptyList()
0394 elif len( filePaths ) < numberOfFiles:
0395 print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))
0396
0397 if debug:
0398 print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
0399 return filePaths
0400
0401 pickRelValInputFiles = PickRelValInputFiles()